sync to dosbox svn

This commit is contained in:
dborth 2009-05-19 08:05:51 +00:00
parent 022766bc73
commit 9e0f047d1b
7 changed files with 1927 additions and 1451 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2002-2008 The DOSBox Team
* Copyright (C) 2002-2009 The DOSBox Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -16,7 +16,7 @@
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
/* $Id: risc_armv4le-common.h,v 1.2 2008/09/19 16:48:02 c2woody Exp $ */
/* $Id: risc_armv4le-common.h,v 1.3 2009/05/16 21:52:47 c2woody Exp $ */
/* ARMv4 (little endian) backend by M-HT (common data/functions) */
@ -89,8 +89,19 @@ typedef Bit8u HostReg;
static void cache_block_closing(Bit8u* block_start,Bitu block_size) {
#if (__ARM_EABI__)
//flush cache - eabi
register unsigned long _beg __asm ("a1") = (unsigned long)(block_start); // block start
register unsigned long _end __asm ("a2") = (unsigned long)(block_start+block_size); // block end
register unsigned long _flg __asm ("a3") = 0;
register unsigned long _par __asm ("r7") = 0xf0002; // sys_cacheflush
__asm __volatile ("swi 0x0"
: // no outputs
: "r" (_beg), "r" (_end), "r" (_flg), "r" (_par)
);
#else
// GP2X BEGIN
//flush cache
//flush cache - old abi
register unsigned long _beg __asm ("a1") = (unsigned long)(block_start); // block start
register unsigned long _end __asm ("a2") = (unsigned long)(block_start+block_size); // block end
register unsigned long _flg __asm ("a3") = 0;
@ -99,4 +110,5 @@ static void cache_block_closing(Bit8u* block_start,Bitu block_size) {
: "r" (_beg), "r" (_end), "r" (_flg)
);
// GP2X END
#endif
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2002-2008 The DOSBox Team
* Copyright (C) 2002-2009 The DOSBox Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -16,7 +16,7 @@
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
/* $Id: risc_armv4le-o3.h,v 1.3 2008/09/19 16:48:02 c2woody Exp $ */
/* $Id: risc_armv4le-o3.h,v 1.4 2009/05/16 21:52:47 c2woody Exp $ */
/* ARMv4 (little endian) backend by M-HT (size-tweaked arm version) */
@ -58,13 +58,102 @@
#define FC_SEGS_ADDR HOST_v8
#endif
// helper macro
#define ROTATE_SCALE(x) ( (x)?(32 - x):(0) )
// instruction encodings
// move
// mov dst, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0
#define MOV_IMM(dst, imm, rimm) (0xe3a00000 + ((dst) << 12) + (imm) + ((rimm) << 7) )
// mov dst, src, lsl #imm
#define MOV_REG_LSL_IMM(dst, src, imm) (0xe1a00000 + ((dst) << 12) + (src) + ((imm) << 7) )
// movs dst, src, lsl #imm
#define MOVS_REG_LSL_IMM(dst, src, imm) (0xe1b00000 + ((dst) << 12) + (src) + ((imm) << 7) )
// mov dst, src, lsr #imm
#define MOV_REG_LSR_IMM(dst, src, imm) (0xe1a00020 + ((dst) << 12) + (src) + ((imm) << 7) )
// mov dst, src, asr #imm
#define MOV_REG_ASR_IMM(dst, src, imm) (0xe1a00040 + ((dst) << 12) + (src) + ((imm) << 7) )
// mov dst, src, lsl rreg
#define MOV_REG_LSL_REG(dst, src, rreg) (0xe1a00010 + ((dst) << 12) + (src) + ((rreg) << 8) )
// mov dst, src, lsr rreg
#define MOV_REG_LSR_REG(dst, src, rreg) (0xe1a00030 + ((dst) << 12) + (src) + ((rreg) << 8) )
// mov dst, src, asr rreg
#define MOV_REG_ASR_REG(dst, src, rreg) (0xe1a00050 + ((dst) << 12) + (src) + ((rreg) << 8) )
// mov dst, src, ror rreg
#define MOV_REG_ROR_REG(dst, src, rreg) (0xe1a00070 + ((dst) << 12) + (src) + ((rreg) << 8) )
// mvn dst, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0
#define MVN_IMM(dst, imm, rimm) (0xe3e00000 + ((dst) << 12) + (imm) + ((rimm) << 7) )
// arithmetic
// add dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0
#define ADD_IMM(dst, src, imm, rimm) (0xe2800000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) )
// add dst, src1, src2, lsl #imm
#define ADD_REG_LSL_IMM(dst, src1, src2, imm) (0xe0800000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) )
// sub dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0
#define SUB_IMM(dst, src, imm, rimm) (0xe2400000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) )
// sub dst, src1, src2, lsl #imm
#define SUB_REG_LSL_IMM(dst, src1, src2, imm) (0xe0400000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) )
// rsb dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0
#define RSB_IMM(dst, src, imm, rimm) (0xe2600000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) )
// cmp src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0
#define CMP_IMM(src, imm, rimm) (0xe3500000 + ((src) << 16) + (imm) + ((rimm) << 7) )
// nop
#define NOP MOV_REG_LSL_IMM(HOST_r0, HOST_r0, 0)
// logical
// tst src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0
#define TST_IMM(src, imm, rimm) (0xe3100000 + ((src) << 16) + (imm) + ((rimm) << 7) )
// and dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0
#define AND_IMM(dst, src, imm, rimm) (0xe2000000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) )
// and dst, src1, src2, lsl #imm
#define AND_REG_LSL_IMM(dst, src1, src2, imm) (0xe0000000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) )
// orr dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0
#define ORR_IMM(dst, src, imm, rimm) (0xe3800000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) )
// orr dst, src1, src2, lsl #imm
#define ORR_REG_LSL_IMM(dst, src1, src2, imm) (0xe1800000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) )
// orr dst, src1, src2, lsr #imm
#define ORR_REG_LSR_IMM(dst, src1, src2, imm) (0xe1800020 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) )
// eor dst, src1, src2, lsl #imm
#define EOR_REG_LSL_IMM(dst, src1, src2, imm) (0xe0200000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) )
// bic dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0
#define BIC_IMM(dst, src, imm, rimm) (0xe3c00000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) )
// load
// ldr reg, [addr, #imm] @ 0 <= imm < 4096
#define LDR_IMM(reg, addr, imm) (0xe5900000 + ((reg) << 12) + ((addr) << 16) + (imm) )
// ldrh reg, [addr, #imm] @ 0 <= imm < 256
#define LDRH_IMM(reg, addr, imm) (0xe1d000b0 + ((reg) << 12) + ((addr) << 16) + (((imm) & 0xf0) << 4) + ((imm) & 0x0f) )
// ldrb reg, [addr, #imm] @ 0 <= imm < 4096
#define LDRB_IMM(reg, addr, imm) (0xe5d00000 + ((reg) << 12) + ((addr) << 16) + (imm) )
// store
// str reg, [addr, #imm] @ 0 <= imm < 4096
#define STR_IMM(reg, addr, imm) (0xe5800000 + ((reg) << 12) + ((addr) << 16) + (imm) )
// strh reg, [addr, #imm] @ 0 <= imm < 256
#define STRH_IMM(reg, addr, imm) (0xe1c000b0 + ((reg) << 12) + ((addr) << 16) + (((imm) & 0xf0) << 4) + ((imm) & 0x0f) )
// strb reg, [addr, #imm] @ 0 <= imm < 4096
#define STRB_IMM(reg, addr, imm) (0xe5c00000 + ((reg) << 12) + ((addr) << 16) + (imm) )
// branch
// beq pc+imm @ 0 <= imm < 32M & imm mod 4 = 0
#define BEQ_FWD(imm) (0x0a000000 + ((imm) >> 2) )
// bne pc+imm @ 0 <= imm < 32M & imm mod 4 = 0
#define BNE_FWD(imm) (0x1a000000 + ((imm) >> 2) )
// bgt pc+imm @ 0 <= imm < 32M & imm mod 4 = 0
#define BGT_FWD(imm) (0xca000000 + ((imm) >> 2) )
// b pc+imm @ 0 <= imm < 32M & imm mod 4 = 0
#define B_FWD(imm) (0xea000000 + ((imm) >> 2) )
// bx reg
#define BX(reg) (0xe12fff10 + (reg) )
// move a full register from reg_src to reg_dst
static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) {
if(reg_src == reg_dst) return;
cache_addd(0xe1a00000 + (reg_dst << 12) + reg_src); // mov reg_dst, reg_src
cache_addd( MOV_REG_LSL_IMM(reg_dst, reg_src, 0) ); // mov reg_dst, reg_src
}
// helper function
@ -114,9 +203,9 @@ static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) {
Bits first, method, scale;
Bit32u imm2, dist;
if (imm == 0) {
cache_addd(0xe3a00000 + (dest_reg << 12)); // mov dest_reg, #0
cache_addd( MOV_IMM(dest_reg, 0, 0) ); // mov dest_reg, #0
} else if (imm == 0xffffffff) {
cache_addd(0xe3e00000 + (dest_reg << 12)); // mvn dest_reg, #0
cache_addd( MVN_IMM(dest_reg, 0, 0) ); // mvn dest_reg, #0
} else {
method = get_method_imm_gen_len(imm, 1, NULL);
@ -130,10 +219,10 @@ static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) {
scale+=2;
}
if (first) {
cache_addd(0xe2400000 + (dest_reg << 12) + (HOST_pc << 16) + (ROTATE_SCALE(scale) << 7) + (dist & 0xff)); // sub dest_reg, pc, #((dist & 0xff) << scale)
cache_addd( SUB_IMM(dest_reg, HOST_pc, dist & 0xff, ROTATE_SCALE(scale)) ); // sub dest_reg, pc, #((dist & 0xff) << scale)
first = 0;
} else {
cache_addd(0xe2400000 + (dest_reg << 12) + (dest_reg << 16) + (ROTATE_SCALE(scale) << 7) + (dist & 0xff)); // sub dest_reg, dest_reg, #((dist & 0xff) << scale)
cache_addd( SUB_IMM(dest_reg, dest_reg, dist & 0xff, ROTATE_SCALE(scale)) ); // sub dest_reg, dest_reg, #((dist & 0xff) << scale)
}
dist>>=8;
scale+=8;
@ -141,7 +230,7 @@ static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) {
} else if (method == 1) {
dist = imm - ((Bit32u)cache.pos+8);
if (dist == 0) {
cache_addd(0xe1a00000 + (dest_reg << 12) + HOST_pc); // mov dest_reg, pc
cache_addd( MOV_REG_LSL_IMM(dest_reg, HOST_pc, 0) ); // mov dest_reg, pc
} else {
while (dist) {
while ((dist & 3) == 0) {
@ -149,10 +238,10 @@ static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) {
scale+=2;
}
if (first) {
cache_addd(0xe2800000 + (dest_reg << 12) + (HOST_pc << 16) + (ROTATE_SCALE(scale) << 7) + (dist & 0xff)); // add dest_reg, pc, #((dist & 0xff) << scale)
cache_addd( ADD_IMM(dest_reg, HOST_pc, dist & 0xff, ROTATE_SCALE(scale)) ); // add dest_reg, pc, #((dist & 0xff) << scale)
first = 0;
} else {
cache_addd(0xe2800000 + (dest_reg << 12) + (dest_reg << 16) + (ROTATE_SCALE(scale) << 7) + (dist & 0xff)); // add dest_reg, dest_reg, #((dist & 0xff) << scale)
cache_addd( ADD_IMM(dest_reg, dest_reg, dist & 0xff, ROTATE_SCALE(scale)) ); // add dest_reg, dest_reg, #((dist & 0xff) << scale)
}
dist>>=8;
scale+=8;
@ -165,10 +254,10 @@ static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) {
scale+=2;
}
if (first) {
cache_addd(0xe3a00000 + (dest_reg << 12) + (ROTATE_SCALE(scale) << 7) + (imm & 0xff)); // mov dest_reg, #((imm & 0xff) << scale)
cache_addd( MOV_IMM(dest_reg, imm & 0xff, ROTATE_SCALE(scale)) ); // mov dest_reg, #((imm & 0xff) << scale)
first = 0;
} else {
cache_addd(0xe3800000 + (dest_reg << 12) + (dest_reg << 16) + (ROTATE_SCALE(scale) << 7) + (imm & 0xff)); // orr dest_reg, dest_reg, #((imm & 0xff) << scale)
cache_addd( ORR_IMM(dest_reg, dest_reg, imm & 0xff, ROTATE_SCALE(scale)) ); // orr dest_reg, dest_reg, #((imm & 0xff) << scale)
}
imm>>=8;
scale+=8;
@ -181,10 +270,10 @@ static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) {
scale+=2;
}
if (first) {
cache_addd(0xe3e00000 + (dest_reg << 12) + (ROTATE_SCALE(scale) << 7) + (imm2 & 0xff)); // mvn dest_reg, #((imm2 & 0xff) << scale)
cache_addd( MVN_IMM(dest_reg, imm2 & 0xff, ROTATE_SCALE(scale)) ); // mvn dest_reg, #((imm2 & 0xff) << scale)
first = 0;
} else {
cache_addd(0xe3c00000 + (dest_reg << 12) + (dest_reg << 16) + (ROTATE_SCALE(scale) << 7) + (imm2 & 0xff)); // bic dest_reg, dest_reg, #((imm2 & 0xff) << scale)
cache_addd( BIC_IMM(dest_reg, dest_reg, imm2 & 0xff, ROTATE_SCALE(scale)) ); // bic dest_reg, dest_reg, #((imm2 & 0xff) << scale)
}
imm2>>=8;
scale+=8;
@ -199,26 +288,26 @@ static void gen_mov_word_to_reg_helper(HostReg dest_reg,void* data,bool dword,Ho
if (dword) {
if ((Bit32u)data & 3) {
if ( ((Bit32u)data & 3) == 2 ) {
cache_addd(0xe1d000b0 + (dest_reg << 12) + (data_reg << 16)); // ldrh dest_reg, [data_reg]
cache_addd(0xe1d000b2 + (temp2 << 12) + (data_reg << 16)); // ldrh temp2, [data_reg, #2]
cache_addd(0xe1800800 + (dest_reg << 12) + (dest_reg << 16) + (temp2)); // orr dest_reg, dest_reg, temp2, lsl #16
cache_addd( LDRH_IMM(dest_reg, data_reg, 0) ); // ldrh dest_reg, [data_reg]
cache_addd( LDRH_IMM(temp2, data_reg, 2) ); // ldrh temp2, [data_reg, #2]
cache_addd( ORR_REG_LSL_IMM(dest_reg, dest_reg, temp2, 16) ); // orr dest_reg, dest_reg, temp2, lsl #16
} else {
cache_addd(0xe5d00000 + (dest_reg << 12) + (data_reg << 16)); // ldrb dest_reg, [data_reg]
cache_addd(0xe1d000b1 + (temp2 << 12) + (data_reg << 16)); // ldrh temp2, [data_reg, #1]
cache_addd(0xe1800400 + (dest_reg << 12) + (dest_reg << 16) + (temp2)); // orr dest_reg, dest_reg, temp2, lsl #8
cache_addd(0xe5d00003 + (temp2 << 12) + (data_reg << 16)); // ldrb temp2, [data_reg, #3]
cache_addd(0xe1800c00 + (dest_reg << 12) + (dest_reg << 16) + (temp2)); // orr dest_reg, dest_reg, temp2, lsl #24
cache_addd( LDRB_IMM(dest_reg, data_reg, 0) ); // ldrb dest_reg, [data_reg]
cache_addd( LDRH_IMM(temp2, data_reg, 1) ); // ldrh temp2, [data_reg, #1]
cache_addd( ORR_REG_LSL_IMM(dest_reg, dest_reg, temp2, 8) ); // orr dest_reg, dest_reg, temp2, lsl #8
cache_addd( LDRB_IMM(temp2, data_reg, 3) ); // ldrb temp2, [data_reg, #3]
cache_addd( ORR_REG_LSL_IMM(dest_reg, dest_reg, temp2, 24) ); // orr dest_reg, dest_reg, temp2, lsl #24
}
} else {
cache_addd(0xe5900000 + (dest_reg << 12) + (data_reg << 16)); // ldr dest_reg, [data_reg]
cache_addd( LDR_IMM(dest_reg, data_reg, 0) ); // ldr dest_reg, [data_reg]
}
} else {
if ((Bit32u)data & 1) {
cache_addd(0xe5d00000 + (dest_reg << 12) + (data_reg << 16)); // ldrb dest_reg, [data_reg]
cache_addd(0xe5d00001 + (temp2 << 12) + (data_reg << 16)); // ldrb temp2, [data_reg, #1]
cache_addd(0xe1800400 + (dest_reg << 12) + (dest_reg << 16) + (temp2)); // orr dest_reg, dest_reg, temp2, lsl #8
cache_addd( LDRB_IMM(dest_reg, data_reg, 0) ); // ldrb dest_reg, [data_reg]
cache_addd( LDRB_IMM(temp2, data_reg, 1) ); // ldrb temp2, [data_reg, #1]
cache_addd( ORR_REG_LSL_IMM(dest_reg, dest_reg, temp2, 8) ); // orr dest_reg, dest_reg, temp2, lsl #8
} else {
cache_addd(0xe1d000b0 + (dest_reg << 12) + (data_reg << 16)); // ldrh dest_reg, [data_reg]
cache_addd( LDRH_IMM(dest_reg, data_reg, 0) ); // ldrh dest_reg, [data_reg]
}
}
}
@ -236,7 +325,7 @@ static void gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) {
Bits first, scale;
Bit32u imm2;
if (imm == 0) {
cache_addd(0xe3a00000 + (dest_reg << 12)); // mov dest_reg, #0
cache_addd( MOV_IMM(dest_reg, 0, 0) ); // mov dest_reg, #0
} else {
scale = 0;
first = 1;
@ -247,10 +336,10 @@ static void gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) {
scale+=2;
}
if (first) {
cache_addd(0xe3a00000 + (dest_reg << 12) + (ROTATE_SCALE(scale) << 7) + (imm2 & 0xff)); // mov dest_reg, #((imm2 & 0xff) << scale)
cache_addd( MOV_IMM(dest_reg, imm2 & 0xff, ROTATE_SCALE(scale)) ); // mov dest_reg, #((imm2 & 0xff) << scale)
first = 0;
} else {
cache_addd(0xe3800000 + (dest_reg << 12) + (dest_reg << 16) + (ROTATE_SCALE(scale) << 7) + (imm2 & 0xff)); // orr dest_reg, dest_reg, #((imm2 & 0xff) << scale)
cache_addd( ORR_IMM(dest_reg, dest_reg, imm2 & 0xff, ROTATE_SCALE(scale)) ); // orr dest_reg, dest_reg, #((imm2 & 0xff) << scale)
}
imm2>>=8;
scale+=8;
@ -264,26 +353,26 @@ static void gen_mov_word_from_reg_helper(HostReg src_reg,void* dest,bool dword,
if (dword) {
if ((Bit32u)dest & 3) {
if ( ((Bit32u)dest & 3) == 2 ) {
cache_addd(0xe1c000b0 + (src_reg << 12) + (data_reg << 16)); // strh src_reg, [data_reg]
cache_addd(0xe1a00820 + (temp2 << 12) + (src_reg)); // mov temp2, src_reg, lsr #16
cache_addd(0xe1c000b2 + (temp2 << 12) + (data_reg << 16)); // strh temp2, [data_reg, #2]
cache_addd( STRH_IMM(src_reg, data_reg, 0) ); // strh src_reg, [data_reg]
cache_addd( MOV_REG_LSR_IMM(temp2, src_reg, 16) ); // mov temp2, src_reg, lsr #16
cache_addd( STRH_IMM(temp2, data_reg, 2) ); // strh temp2, [data_reg, #2]
} else {
cache_addd(0xe5c00000 + (src_reg << 12) + (data_reg << 16)); // strb src_reg, [data_reg]
cache_addd(0xe1a00420 + (temp2 << 12) + (src_reg)); // mov temp2, src_reg, lsr #8
cache_addd(0xe1c000b1 + (temp2 << 12) + (data_reg << 16)); // strh temp2, [data_reg, #1]
cache_addd(0xe1a00820 + (temp2 << 12) + (temp2)); // mov temp2, temp2, lsr #16
cache_addd(0xe5c00003 + (temp2 << 12) + (data_reg << 16)); // strb temp2, [data_reg, #3]
cache_addd( STRB_IMM(src_reg, data_reg, 0) ); // strb src_reg, [data_reg]
cache_addd( MOV_REG_LSR_IMM(temp2, src_reg, 8) ); // mov temp2, src_reg, lsr #8
cache_addd( STRH_IMM(temp2, data_reg, 1) ); // strh temp2, [data_reg, #1]
cache_addd( MOV_REG_LSR_IMM(temp2, temp2, 16) ); // mov temp2, temp2, lsr #16
cache_addd( STRB_IMM(temp2, data_reg, 3) ); // strb temp2, [data_reg, #3]
}
} else {
cache_addd(0xe5800000 + (src_reg << 12) + (data_reg << 16)); // str src_reg, [data_reg]
cache_addd( STR_IMM(src_reg, data_reg, 0) ); // str src_reg, [data_reg]
}
} else {
if ((Bit32u)dest & 1) {
cache_addd(0xe5c00000 + (src_reg << 12) + (data_reg << 16)); // strb src_reg, [data_reg]
cache_addd(0xe1a00420 + (temp2 << 12) + (src_reg)); // mov temp2, src_reg, lsr #8
cache_addd(0xe5c00001 + (temp2 << 12) + (data_reg << 16)); // strb temp2, [data_reg, #1]
cache_addd( STRB_IMM(src_reg, data_reg, 0) ); // strb src_reg, [data_reg]
cache_addd( MOV_REG_LSR_IMM(temp2, src_reg, 8) ); // mov temp2, src_reg, lsr #8
cache_addd( STRB_IMM(temp2, data_reg, 1) ); // strb temp2, [data_reg, #1]
} else {
cache_addd(0xe1c000b0 + (src_reg << 12) + (data_reg << 16)); // strh src_reg, [data_reg]
cache_addd( STRH_IMM(src_reg, data_reg, 0) ); // strh src_reg, [data_reg]
}
}
}
@ -300,7 +389,7 @@ static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) {
// registers might not be directly byte-accessible on some architectures
static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) {
gen_mov_dword_to_reg_imm(temp1, (Bit32u)data);
cache_addd(0xe5d00000 + (dest_reg << 12) + (temp1 << 16)); // ldrb dest_reg, [temp1]
cache_addd( LDRB_IMM(dest_reg, temp1, 0) ); // ldrb dest_reg, [temp1]
}
// move an 8bit value from memory into dest_reg
@ -316,7 +405,7 @@ static void INLINE gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* dat
// this function does not use FC_OP1/FC_OP2 as dest_reg as these
// registers might not be directly byte-accessible on some architectures
static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) {
cache_addd(0xe3a00000 + (dest_reg << 12) + (imm)); // mov dest_reg, #(imm)
cache_addd( MOV_IMM(dest_reg, imm, 0) ); // mov dest_reg, #(imm)
}
// move an 8bit constant value into dest_reg
@ -330,7 +419,7 @@ static void INLINE gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u
// move the lowest 8bit of a register into memory
static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) {
gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest);
cache_addd(0xe5c00000 + (src_reg << 12) + (temp1 << 16)); // strb src_reg, [temp1]
cache_addd( STRB_IMM(src_reg, temp1, 0) ); // strb src_reg, [temp1]
}
@ -339,10 +428,10 @@ static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) {
// the register is zero-extended (sign==false) or sign-extended (sign==true)
static void gen_extend_byte(bool sign,HostReg reg) {
if (sign) {
cache_addd(0xe1a00c00 + (reg << 12) + (reg)); // mov reg, reg, lsl #24
cache_addd(0xe1a00c40 + (reg << 12) + (reg)); // mov reg, reg, asr #24
cache_addd( MOV_REG_LSL_IMM(reg, reg, 24) ); // mov reg, reg, lsl #24
cache_addd( MOV_REG_ASR_IMM(reg, reg, 24) ); // mov reg, reg, asr #24
} else {
cache_addd(0xe20000ff + (reg << 12) + (reg << 16)); // and reg, reg, #0xff
cache_addd( AND_IMM(reg, reg, 0xff, 0) ); // and reg, reg, #0xff
}
}
@ -350,18 +439,18 @@ static void gen_extend_byte(bool sign,HostReg reg) {
// the register is zero-extended (sign==false) or sign-extended (sign==true)
static void gen_extend_word(bool sign,HostReg reg) {
if (sign) {
cache_addd(0xe1a00800 + (reg << 12) + (reg)); // mov reg, reg, lsl #16
cache_addd(0xe1a00840 + (reg << 12) + (reg)); // mov reg, reg, asr #16
cache_addd( MOV_REG_LSL_IMM(reg, reg, 16) ); // mov reg, reg, lsl #16
cache_addd( MOV_REG_ASR_IMM(reg, reg, 16) ); // mov reg, reg, asr #16
} else {
cache_addd(0xe1a00800 + (reg << 12) + (reg)); // mov reg, reg, lsl #16
cache_addd(0xe1a00820 + (reg << 12) + (reg)); // mov reg, reg, lsr #16
cache_addd( MOV_REG_LSL_IMM(reg, reg, 16) ); // mov reg, reg, lsl #16
cache_addd( MOV_REG_LSR_IMM(reg, reg, 16) ); // mov reg, reg, lsr #16
}
}
// add a 32bit value from memory to a full register
static void gen_add(HostReg reg,void* op) {
gen_mov_word_to_reg(temp3, op, 1);
cache_addd(0xe0800000 + (reg << 12) + (reg << 16) + (temp3)); // add reg, reg, temp3
cache_addd( ADD_REG_LSL_IMM(reg, reg, temp3, 0) ); // add reg, reg, temp3
}
// add a 32bit constant value to a full register
@ -369,9 +458,9 @@ static void gen_add_imm(HostReg reg,Bit32u imm) {
Bits method1, method2, num1, num2, scale, sub;
if(!imm) return;
if (imm == 1) {
cache_addd(0xe2800001 + (reg << 12) + (reg << 16)); // add reg, reg, #1
cache_addd( ADD_IMM(reg, reg, 1, 0) ); // add reg, reg, #1
} else if (imm == 0xffffffff) {
cache_addd(0xe2400001 + (reg << 12) + (reg << 16)); // sub reg, reg, #1
cache_addd( SUB_IMM(reg, reg, 1, 0) ); // sub reg, reg, #1
} else {
method1 = get_method_imm_gen_len(imm, 1, &num1);
method2 = get_method_imm_gen_len(-((Bit32s)imm), 1, &num2);
@ -384,9 +473,9 @@ static void gen_add_imm(HostReg reg,Bit32u imm) {
if (method1 != 2) {
gen_mov_dword_to_reg_imm(temp3, imm);
if (sub) {
cache_addd(0xe0400000 + (reg << 12) + (reg << 16) + (temp3)); // sub reg, reg, temp3
cache_addd( SUB_REG_LSL_IMM(reg, reg, temp3, 0) ); // sub reg, reg, temp3
} else {
cache_addd(0xe0800000 + (reg << 12) + (reg << 16) + (temp3)); // add reg, reg, temp3
cache_addd( ADD_REG_LSL_IMM(reg, reg, temp3, 0) ); // add reg, reg, temp3
}
} else {
scale = 0;
@ -396,9 +485,9 @@ static void gen_add_imm(HostReg reg,Bit32u imm) {
scale+=2;
}
if (sub) {
cache_addd(0xe2400000 + (reg << 12) + (reg << 16) + (ROTATE_SCALE(scale) << 7) + (imm & 0xff)); // sub reg, reg, #((imm & 0xff) << scale)
cache_addd( SUB_IMM(reg, reg, imm & 0xff, ROTATE_SCALE(scale)) ); // sub reg, reg, #((imm & 0xff) << scale)
} else {
cache_addd(0xe2800000 + (reg << 12) + (reg << 16) + (ROTATE_SCALE(scale) << 7) + (imm & 0xff)); // add reg, reg, #((imm & 0xff) << scale)
cache_addd( ADD_IMM(reg, reg, imm & 0xff, ROTATE_SCALE(scale)) ); // add reg, reg, #((imm & 0xff) << scale)
}
imm>>=8;
scale+=8;
@ -414,12 +503,12 @@ static void gen_and_imm(HostReg reg,Bit32u imm) {
imm2 = ~imm;
if(!imm2) return;
if (!imm) {
cache_addd(0xe3a00000 + (reg << 12)); // mov reg, #0
cache_addd( MOV_IMM(reg, 0, 0) ); // mov reg, #0
} else {
method = get_method_imm_gen_len(imm, 0, NULL);
if (method != 3) {
gen_mov_dword_to_reg_imm(temp3, imm);
cache_addd(0xe0000000 + (reg << 12) + (reg << 16) + (temp3)); // and reg, reg, temp3
cache_addd( AND_REG_LSL_IMM(reg, reg, temp3, 0) ); // and reg, reg, temp3
} else {
scale = 0;
while (imm2) {
@ -427,7 +516,7 @@ static void gen_and_imm(HostReg reg,Bit32u imm) {
imm2>>=2;
scale+=2;
}
cache_addd(0xe3c00000 + (reg << 12) + (reg << 16) + (ROTATE_SCALE(scale) << 7) + (imm2 & 0xff)); // bic reg, reg, #((imm2 & 0xff) << scale)
cache_addd( BIC_IMM(reg, reg, imm2 & 0xff, ROTATE_SCALE(scale)) ); // bic reg, reg, #((imm2 & 0xff) << scale)
imm2>>=8;
scale+=8;
}
@ -453,9 +542,9 @@ static void gen_add_direct_byte(void* dest,Bit8s imm) {
gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest);
gen_mov_word_to_reg_helper(temp3, dest, 1, temp1);
if (imm >= 0) {
cache_addd(0xe2800000 + (temp3 << 12) + (temp3 << 16) + ((Bit32s)imm)); // add temp3, temp3, #(imm)
cache_addd( ADD_IMM(temp3, temp3, (Bit32s)imm, 0) ); // add temp3, temp3, #(imm)
} else {
cache_addd(0xe2400000 + (temp3 << 12) + (temp3 << 16) + (-((Bit32s)imm))); // sub temp3, temp3, #(-imm)
cache_addd( SUB_IMM(temp3, temp3, -((Bit32s)imm), 0) ); // sub temp3, temp3, #(-imm)
}
gen_mov_word_from_reg_helper(temp3, dest, 1, temp1);
}
@ -475,7 +564,7 @@ static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) {
} else {
gen_mov_word_to_reg_imm(temp2, (Bit16u)imm);
}
cache_addd(0xe0800000 + (temp3 << 12) + (temp3 << 16) + (temp2)); // add temp3, temp3, temp2
cache_addd( ADD_REG_LSL_IMM(temp3, temp3, temp2, 0) ); // add temp3, temp3, temp2
gen_mov_word_from_reg_helper(temp3, dest, dword, temp1);
}
@ -485,9 +574,9 @@ static void gen_sub_direct_byte(void* dest,Bit8s imm) {
gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest);
gen_mov_word_to_reg_helper(temp3, dest, 1, temp1);
if (imm >= 0) {
cache_addd(0xe2400000 + (temp3 << 12) + (temp3 << 16) + ((Bit32s)imm)); // sub temp3, temp3, #(imm)
cache_addd( SUB_IMM(temp3, temp3, (Bit32s)imm, 0) ); // sub temp3, temp3, #(imm)
} else {
cache_addd(0xe2800000 + (temp3 << 12) + (temp3 << 16) + (-((Bit32s)imm))); // add temp3, temp3, #(-imm)
cache_addd( ADD_IMM(temp3, temp3, -((Bit32s)imm), 0) ); // add temp3, temp3, #(-imm)
}
gen_mov_word_from_reg_helper(temp3, dest, 1, temp1);
}
@ -507,7 +596,7 @@ static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) {
} else {
gen_mov_word_to_reg_imm(temp2, (Bit16u)imm);
}
cache_addd(0xe0400000 + (temp3 << 12) + (temp3 << 16) + (temp2)); // sub temp3, temp3, temp2
cache_addd( SUB_REG_LSL_IMM(temp3, temp3, temp2, 0) ); // sub temp3, temp3, temp2
gen_mov_word_from_reg_helper(temp3, dest, dword, temp1);
}
@ -515,7 +604,7 @@ static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) {
// scale_reg is scaled by scale (scale_reg*(2^scale)) and
// added to dest_reg, then the immediate value is added
static INLINE void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm) {
cache_addd(0xe0800000 + (dest_reg << 12) + (dest_reg << 16) + (scale_reg) + (scale << 7)); // add dest_reg, dest_reg, scale_reg, lsl #(scale)
cache_addd( ADD_REG_LSL_IMM(dest_reg, dest_reg, scale_reg, scale) ); // add dest_reg, dest_reg, scale_reg, lsl #(scale)
gen_add_imm(dest_reg, imm);
}
@ -524,18 +613,18 @@ static INLINE void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits im
// then the immediate value is added
static INLINE void gen_lea(HostReg dest_reg,Bitu scale,Bits imm) {
if (scale) {
cache_addd(0xe1a00000 + (dest_reg << 12) + (dest_reg) + (scale << 7)); // mov dest_reg, dest_reg, lsl #(scale)
cache_addd( MOV_REG_LSL_IMM(dest_reg, dest_reg, scale) ); // mov dest_reg, dest_reg, lsl #(scale)
}
gen_add_imm(dest_reg, imm);
}
// generate a call to a parameterless function
static void INLINE gen_call_function_raw(void * func) {
cache_addd(0xe5900004 + (temp1 << 12) + (HOST_pc << 16)); // ldr temp1, [pc, #4]
cache_addd(0xe2800004 + (HOST_lr << 12) + (HOST_pc << 16)); // add lr, pc, #4
cache_addd(0xe12fff10 + (temp1)); // bx temp1
cache_addd( LDR_IMM(temp1, HOST_pc, 4) ); // ldr temp1, [pc, #4]
cache_addd( ADD_IMM(HOST_lr, HOST_pc, 4, 0) ); // add lr, pc, #4
cache_addd( BX(temp1) ); // bx temp1
cache_addd((Bit32u)func); // .int func
cache_addd(0xe1a00000 + (FC_RETOP << 12) + HOST_a1); // mov FC_RETOP, a1
cache_addd( MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 0) ); // mov FC_RETOP, a1
}
// generate a call to a function with paramcount parameters
@ -596,9 +685,9 @@ static void gen_jmp_ptr(void * ptr,Bits imm=0) {
scale+=2;
}
if (sub) {
cache_addd(0xe2400000 + (temp3 << 12) + (temp3 << 16) + (ROTATE_SCALE(scale) << 7) + (imm2 & 0xff)); // sub temp3, temp3, #((imm2 & 0xff) << scale)
cache_addd( SUB_IMM(temp3, temp3, imm2 & 0xff, ROTATE_SCALE(scale)) ); // sub temp3, temp3, #((imm2 & 0xff) << scale)
} else {
cache_addd(0xe2800000 + (temp3 << 12) + (temp3 << 16) + (ROTATE_SCALE(scale) << 7) + (imm2 & 0xff)); // add temp3, temp3, #((imm2 & 0xff) << scale)
cache_addd( ADD_IMM(temp3, temp3, imm2 & 0xff, ROTATE_SCALE(scale)) ); // add temp3, temp3, #((imm2 & 0xff) << scale)
}
imm2>>=8;
scale+=8;
@ -608,31 +697,31 @@ static void gen_jmp_ptr(void * ptr,Bits imm=0) {
#if (1)
// (*ptr) should be word aligned
if ((imm & 0x03) == 0) {
cache_addd(0xe5900000 + (temp1 << 12) + (temp3 << 16)); // ldr temp1, [temp3]
cache_addd( LDR_IMM(temp1, temp3, 0) ); // ldr temp1, [temp3]
} else
#endif
{
cache_addd(0xe5d00000 + (temp1 << 12) + (temp3 << 16)); // ldrb temp1, [temp3]
cache_addd(0xe5d00001 + (temp2 << 12) + (temp3 << 16)); // ldrb temp2, [temp3, #1]
cache_addd(0xe1800400 + (temp1 << 12) + (temp1 << 16) + (temp2)); // orr temp1, temp1, temp2, lsl #8
cache_addd(0xe5d00002 + (temp2 << 12) + (temp3 << 16)); // ldrb temp2, [temp3, #2]
cache_addd(0xe1800800 + (temp1 << 12) + (temp1 << 16) + (temp2)); // orr temp1, temp1, temp2, lsl #16
cache_addd(0xe5d00003 + (temp2 << 12) + (temp3 << 16)); // ldrb temp2, [temp3, #3]
cache_addd(0xe1800c00 + (temp1 << 12) + (temp1 << 16) + (temp2)); // orr temp1, temp1, temp2, lsl #24
cache_addd( LDRB_IMM(temp1, temp3, 0) ); // ldrb temp1, [temp3]
cache_addd( LDRB_IMM(temp2, temp3, 1) ); // ldrb temp2, [temp3, #1]
cache_addd( ORR_REG_LSL_IMM(temp1, temp1, temp2, 8) ); // orr temp1, temp1, temp2, lsl #8
cache_addd( LDRB_IMM(temp2, temp3, 2) ); // ldrb temp2, [temp3, #2]
cache_addd( ORR_REG_LSL_IMM(temp1, temp1, temp2, 16) ); // orr temp1, temp1, temp2, lsl #16
cache_addd( LDRB_IMM(temp2, temp3, 3) ); // ldrb temp2, [temp3, #3]
cache_addd( ORR_REG_LSL_IMM(temp1, temp1, temp2, 24) ); // orr temp1, temp1, temp2, lsl #24
}
cache_addd(0xe12fff10 + (temp1)); // bx temp1
cache_addd( BX(temp1) ); // bx temp1
}
// short conditional jump (+-127 bytes) if register is zero
// the destination is set by gen_fill_branch() later
static Bit32u gen_create_branch_on_zero(HostReg reg,bool dword) {
if (dword) {
cache_addd(0xe3500000 + (reg << 16)); // cmp reg, #0
cache_addd( CMP_IMM(reg, 0, 0) ); // cmp reg, #0
} else {
cache_addd(0xe1b00800 + (temp1 << 12) + (reg)); // movs temp1, reg, lsl #16
cache_addd( MOVS_REG_LSL_IMM(temp1, reg, 16) ); // movs temp1, reg, lsl #16
}
cache_addd(0x0a000000); // beq j
cache_addd( BEQ_FWD(0) ); // beq j
return ((Bit32u)cache.pos-4);
}
@ -640,11 +729,11 @@ static Bit32u gen_create_branch_on_zero(HostReg reg,bool dword) {
// the destination is set by gen_fill_branch() later
static Bit32u gen_create_branch_on_nonzero(HostReg reg,bool dword) {
if (dword) {
cache_addd(0xe3500000 + (reg << 16)); // cmp reg, #0
cache_addd( CMP_IMM(reg, 0, 0) ); // cmp reg, #0
} else {
cache_addd(0xe1b00800 + (temp1 << 12) + (reg)); // movs temp1, reg, lsl #16
cache_addd( MOVS_REG_LSL_IMM(temp1, reg, 16) ); // movs temp1, reg, lsl #16
}
cache_addd(0x1a000000); // bne j
cache_addd( BNE_FWD(0) ); // bne j
return ((Bit32u)cache.pos-4);
}
@ -663,13 +752,13 @@ static void INLINE gen_fill_branch(DRC_PTR_SIZE_IM data) {
// for isdword==false the lowest 8bit of the register are tested
static Bit32u gen_create_branch_long_nonzero(HostReg reg,bool isdword) {
if (isdword) {
cache_addd(0xe3500000 + (reg << 16)); // cmp reg, #0
cache_addd( CMP_IMM(reg, 0, 0) ); // cmp reg, #0
} else {
cache_addd(0xe31000ff + (reg << 16)); // tst reg, #0xff
cache_addd( TST_IMM(reg, 0xff, 0) ); // tst reg, #0xff
}
cache_addd(0x0a000002); // beq nobranch
cache_addd(0xe5900000 + (temp1 << 12) + (HOST_pc << 16)); // ldr temp1, [pc, #0]
cache_addd(0xe12fff10 + (temp1)); // bx temp1
cache_addd( BEQ_FWD(8) ); // beq nobranch (pc +8)
cache_addd( LDR_IMM(temp1, HOST_pc, 0) ); // ldr temp1, [pc, #0]
cache_addd( BX(temp1) ); // bx temp1
cache_addd(0); // fill j
// nobranch:
return ((Bit32u)cache.pos-4);
@ -677,10 +766,10 @@ static Bit32u gen_create_branch_long_nonzero(HostReg reg,bool isdword) {
// compare 32bit-register against zero and jump if value less/equal than zero
static Bit32u gen_create_branch_long_leqzero(HostReg reg) {
cache_addd(0xe3500000 + (reg << 16)); // cmp reg, #0
cache_addd(0xca000002); // bgt nobranch
cache_addd(0xe5900000 + (temp1 << 12) + (HOST_pc << 16)); // ldr temp1, [pc, #0]
cache_addd(0xe12fff10 + (temp1)); // bx temp1
cache_addd( CMP_IMM(reg, 0, 0) ); // cmp reg, #0
cache_addd( BGT_FWD(8) ); // bgt nobranch (pc+8)
cache_addd( LDR_IMM(temp1, HOST_pc, 0) ); // ldr temp1, [pc, #0]
cache_addd( BX(temp1) ); // bx temp1
cache_addd(0); // fill j
// nobranch:
return ((Bit32u)cache.pos-4);
@ -697,23 +786,26 @@ static void gen_run_code(void) {
cache_addd(0xe92d0df0); // stmfd sp!, {v1-v5,v7,v8}
// adr: 8
cache_addd(0xe5900000 + (FC_SEGS_ADDR << 12) + (HOST_pc << 16) + (64 - (8 + 8))); // ldr FC_SEGS_ADDR, [pc, #(&Segs)]
cache_addd( LDR_IMM(FC_SEGS_ADDR, HOST_pc, 64 - (8 + 8)) ); // ldr FC_SEGS_ADDR, [pc, #(&Segs)]
// adr: 12
cache_addd(0xe5900000 + (FC_REGS_ADDR << 12) + (HOST_pc << 16) + (68 - (12 + 8))); // ldr FC_REGS_ADDR, [pc, #(&cpu_regs)]
cache_addd(0xe28fe004); // add lr, pc, #4
cache_addd( LDR_IMM(FC_REGS_ADDR, HOST_pc, 68 - (12 + 8)) ); // ldr FC_REGS_ADDR, [pc, #(&cpu_regs)]
cache_addd( ADD_IMM(HOST_lr, HOST_pc, 4, 0) ); // add lr, pc, #4
cache_addd(0xe92d4000); // stmfd sp!, {lr}
cache_addd(0xe12fff10); // bx r0
cache_addd( BX(HOST_r0) ); // bx r0
cache_addd(0xe8bd0df0); // ldmfd sp!, {v1-v5,v7,v8}
cache_addd(0xe8bd4000); // ldmfd sp!, {lr}
cache_addd(0xe12fff1e); // bx lr
cache_addd( BX(HOST_lr) ); // bx lr
// fill up to 64 bytes
cache_addd(0xe1a00000); // nop
cache_addd(0xe1a00000); // nop
cache_addd(0xe1a00000); // nop
cache_addd(0xe1a00000); // nop
cache_addd(0xe1a00000); // nop
cache_addd(0xe1a00000); // nop
cache_addd( NOP ); // nop
cache_addd( NOP ); // nop
cache_addd( NOP ); // nop
cache_addd( NOP ); // nop
cache_addd( NOP ); // nop
cache_addd( NOP ); // nop
// adr: 64
cache_addd((Bit32u)&Segs); // address of "Segs"
@ -723,9 +815,9 @@ static void gen_run_code(void) {
// return from a function
static void gen_return_function(void) {
cache_addd(0xe1a00000 + (HOST_a1 << 12) + FC_RETOP); // mov a1, FC_RETOP
cache_addd( MOV_REG_LSL_IMM(HOST_a1, FC_RETOP, 0) ); // mov a1, FC_RETOP
cache_addd(0xe8bd4000); // ldmfd sp!, {lr}
cache_addd(0xe12fff1e); // bx lr
cache_addd( BX(HOST_lr) ); // bx lr
}
#ifdef DRC_FLAGS_INVALIDATION
@ -739,32 +831,32 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) {
case t_ADDb:
case t_ADDw:
case t_ADDd:
*(Bit32u*)pos=0xe0800000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // add FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
*(Bit32u*)pos=ADD_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // add FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4)
break;
case t_ORb:
case t_ORw:
case t_ORd:
*(Bit32u*)pos=0xe1800000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // orr FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
*(Bit32u*)pos=ORR_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // orr FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4)
break;
case t_ANDb:
case t_ANDw:
case t_ANDd:
*(Bit32u*)pos=0xe0000000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // and FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
*(Bit32u*)pos=AND_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // and FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4)
break;
case t_SUBb:
case t_SUBw:
case t_SUBd:
*(Bit32u*)pos=0xe0400000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // sub FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
*(Bit32u*)pos=SUB_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // sub FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4)
break;
case t_XORb:
case t_XORw:
case t_XORd:
*(Bit32u*)pos=0xe0200000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // eor FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
*(Bit32u*)pos=EOR_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // eor FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4)
break;
case t_CMPb:
case t_CMPw:
@ -772,106 +864,106 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) {
case t_TESTb:
case t_TESTw:
case t_TESTd:
*(Bit32u*)pos=0xea000000 + (3); // b (pc+3*4)
*(Bit32u*)pos=B_FWD(12); // b (pc+3*4)
break;
case t_INCb:
case t_INCw:
case t_INCd:
*(Bit32u*)pos=0xe2800000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (1); // add FC_RETOP, a1, #1
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
*(Bit32u*)pos=ADD_IMM(FC_RETOP, HOST_a1, 1, 0); // add FC_RETOP, a1, #1
*(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4)
break;
case t_DECb:
case t_DECw:
case t_DECd:
*(Bit32u*)pos=0xe2400000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (1); // sub FC_RETOP, a1, #1
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
*(Bit32u*)pos=SUB_IMM(FC_RETOP, HOST_a1, 1, 0); // sub FC_RETOP, a1, #1
*(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4)
break;
case t_SHLb:
case t_SHLw:
case t_SHLd:
*(Bit32u*)pos=0xe1a00010 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, lsl a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
*(Bit32u*)pos=MOV_REG_LSL_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, lsl a2
*(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4)
break;
case t_SHRb:
*(Bit32u*)pos=0xe2000000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (0xff); // and FC_RETOP, a1, #0xff
*(Bit32u*)(pos+4)=0xe1a00030 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, lsr a2
*(Bit32u*)(pos+8)=0xe1a00000; // nop
*(Bit32u*)(pos+12)=0xe1a00000; // nop
*(Bit32u*)(pos+16)=0xe1a00000; // nop
*(Bit32u*)pos=AND_IMM(FC_RETOP, HOST_a1, 0xff, 0); // and FC_RETOP, a1, #0xff
*(Bit32u*)(pos+4)=MOV_REG_LSR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, lsr a2
*(Bit32u*)(pos+8)=NOP; // nop
*(Bit32u*)(pos+12)=NOP; // nop
*(Bit32u*)(pos+16)=NOP; // nop
break;
case t_SHRw:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (16 << 7); // mov FC_RETOP, a1, lsl #16
*(Bit32u*)(pos+4)=0xe1a00020 + (FC_RETOP << 12) + (FC_RETOP) + (16 << 7); // mov FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+8)=0xe1a00030 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, lsr a2
*(Bit32u*)(pos+12)=0xe1a00000; // nop
*(Bit32u*)(pos+16)=0xe1a00000; // nop
*(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16
*(Bit32u*)(pos+4)=MOV_REG_LSR_IMM(FC_RETOP, FC_RETOP, 16); // mov FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+8)=MOV_REG_LSR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, lsr a2
*(Bit32u*)(pos+12)=NOP; // nop
*(Bit32u*)(pos+16)=NOP; // nop
break;
case t_SHRd:
*(Bit32u*)pos=0xe1a00030 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, lsr a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
*(Bit32u*)pos=MOV_REG_LSR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, lsr a2
*(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4)
break;
case t_SARb:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (24 << 7); // mov FC_RETOP, a1, lsl #24
*(Bit32u*)(pos+4)=0xe1a00040 + (FC_RETOP << 12) + (FC_RETOP) + (24 << 7); // mov FC_RETOP, FC_RETOP, asr #24
*(Bit32u*)(pos+8)=0xe1a00050 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, asr a2
*(Bit32u*)(pos+12)=0xe1a00000; // nop
*(Bit32u*)(pos+16)=0xe1a00000; // nop
*(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 24); // mov FC_RETOP, a1, lsl #24
*(Bit32u*)(pos+4)=MOV_REG_ASR_IMM(FC_RETOP, FC_RETOP, 24); // mov FC_RETOP, FC_RETOP, asr #24
*(Bit32u*)(pos+8)=MOV_REG_ASR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, asr a2
*(Bit32u*)(pos+12)=NOP; // nop
*(Bit32u*)(pos+16)=NOP; // nop
break;
case t_SARw:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (16 << 7); // mov FC_RETOP, a1, lsl #16
*(Bit32u*)(pos+4)=0xe1a00040 + (FC_RETOP << 12) + (FC_RETOP) + (16 << 7); // mov FC_RETOP, FC_RETOP, asr #16
*(Bit32u*)(pos+8)=0xe1a00050 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, asr a2
*(Bit32u*)(pos+12)=0xe1a00000; // nop
*(Bit32u*)(pos+16)=0xe1a00000; // nop
*(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16
*(Bit32u*)(pos+4)=MOV_REG_ASR_IMM(FC_RETOP, FC_RETOP, 16); // mov FC_RETOP, FC_RETOP, asr #16
*(Bit32u*)(pos+8)=MOV_REG_ASR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, asr a2
*(Bit32u*)(pos+12)=NOP; // nop
*(Bit32u*)(pos+16)=NOP; // nop
break;
case t_SARd:
*(Bit32u*)pos=0xe1a00050 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, asr a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
*(Bit32u*)pos=MOV_REG_ASR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, asr a2
*(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4)
break;
case t_RORb:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (24 << 7); // mov FC_RETOP, a1, lsl #24
*(Bit32u*)(pos+4)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (8 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #8
*(Bit32u*)(pos+8)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (16 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+12)=0xe1a00070 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, ror a2
*(Bit32u*)(pos+16)=0xe1a00000; // nop
*(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 24); // mov FC_RETOP, a1, lsl #24
*(Bit32u*)(pos+4)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 8); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #8
*(Bit32u*)(pos+8)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+12)=MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, ror a2
*(Bit32u*)(pos+16)=NOP; // nop
break;
case t_RORw:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (16 << 7); // mov FC_RETOP, a1, lsl #16
*(Bit32u*)(pos+4)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (16 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+8)=0xe1a00070 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, ror a2
*(Bit32u*)(pos+12)=0xe1a00000; // nop
*(Bit32u*)(pos+16)=0xe1a00000; // nop
*(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16
*(Bit32u*)(pos+4)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+8)=MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, ror a2
*(Bit32u*)(pos+12)=NOP; // nop
*(Bit32u*)(pos+16)=NOP; // nop
break;
case t_RORd:
*(Bit32u*)pos=0xe1a00070 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, ror a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
*(Bit32u*)pos=MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, ror a2
*(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4)
break;
case t_ROLb:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (24 << 7); // mov FC_RETOP, a1, lsl #24
*(Bit32u*)(pos+4)=0xe2600000 + (HOST_a2 << 12) + (HOST_a2 << 16) + (32); // rsb a2, a2, #32
*(Bit32u*)(pos+8)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (8 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #8
*(Bit32u*)(pos+12)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (16 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+16)=0xe1a00070 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, ror a2
*(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 24); // mov FC_RETOP, a1, lsl #24
*(Bit32u*)(pos+4)=RSB_IMM(HOST_a2, HOST_a2, 32, 0); // rsb a2, a2, #32
*(Bit32u*)(pos+8)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 8); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #8
*(Bit32u*)(pos+12)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+16)=MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, ror a2
break;
case t_ROLw:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (16 << 7); // mov FC_RETOP, a1, lsl #16
*(Bit32u*)(pos+4)=0xe2600000 + (HOST_a2 << 12) + (HOST_a2 << 16) + (32); // rsb a2, a2, #32
*(Bit32u*)(pos+8)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (16 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+12)=0xe1a00070 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, ror a2
*(Bit32u*)(pos+16)=0xe1a00000; // nop
*(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16
*(Bit32u*)(pos+4)=RSB_IMM(HOST_a2, HOST_a2, 32, 0); // rsb a2, a2, #32
*(Bit32u*)(pos+8)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+12)=MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, ror a2
*(Bit32u*)(pos+16)=NOP; // nop
break;
case t_ROLd:
*(Bit32u*)pos=0xe2600000 + (HOST_a2 << 12) + (HOST_a2 << 16) + (32); // rsb a2, a2, #32
*(Bit32u*)(pos+4)=0xe1a00070 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, ror a2
*(Bit32u*)(pos+8)=0xe1a00000; // nop
*(Bit32u*)(pos+12)=0xe1a00000; // nop
*(Bit32u*)(pos+16)=0xe1a00000; // nop
*(Bit32u*)pos=RSB_IMM(HOST_a2, HOST_a2, 32, 0); // rsb a2, a2, #32
*(Bit32u*)(pos+4)=MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, ror a2
*(Bit32u*)(pos+8)=NOP; // nop
*(Bit32u*)(pos+12)=NOP; // nop
*(Bit32u*)(pos+16)=NOP; // nop
break;
case t_NEGb:
case t_NEGw:
case t_NEGd:
*(Bit32u*)pos=0xe2600000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (0); // rsb FC_RETOP, a1, #0
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
*(Bit32u*)pos=RSB_IMM(FC_RETOP, HOST_a1, 0, 0); // rsb FC_RETOP, a1, #0
*(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4)
break;
default:
*(Bit32u*)(pos+12)=(Bit32u)fct_ptr; // simple_func
@ -886,24 +978,23 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) {
static void cache_block_before_close(void) { }
#ifdef DRC_USE_SEGS_ADDR
// mov 16bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 2 must be zero)
// 16bit moves may destroy the upper 16bit of the destination register
static void gen_mov_seg16_to_reg(HostReg dest_reg,Bitu index) {
cache_addd(0xe1d000b0 + (dest_reg << 12) + (FC_SEGS_ADDR << 16) + ((index & 0xf0) << 4) + (index & 0x0f)); // ldrh dest_reg, [FC_SEGS_ADDR, #index]
cache_addd( LDRH_IMM(dest_reg, FC_SEGS_ADDR, index) ); // ldrh dest_reg, [FC_SEGS_ADDR, #index]
}
// mov 32bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 4 must be zero)
static void gen_mov_seg32_to_reg(HostReg dest_reg,Bitu index) {
cache_addd(0xe5900000 + (dest_reg << 12) + (FC_SEGS_ADDR << 16) + (index)); // ldr dest_reg, [FC_SEGS_ADDR, #index]
cache_addd( LDR_IMM(dest_reg, FC_SEGS_ADDR, index) ); // ldr dest_reg, [FC_SEGS_ADDR, #index]
}
// add a 32bit value from Segs[index] to a full register using FC_SEGS_ADDR (index modulo 4 must be zero)
static void gen_add_seg32_to_reg(HostReg reg,Bitu index) {
cache_addd(0xe5900000 + (temp1 << 12) + (FC_SEGS_ADDR << 16) + (index)); // ldr temp1, [FC_SEGS_ADDR, #index]
cache_addd(0xe0800000 + (reg << 12) + (reg << 16) + (temp1)); // add reg, reg, temp1
cache_addd( LDR_IMM(temp1, FC_SEGS_ADDR, index) ); // ldr temp1, [FC_SEGS_ADDR, #index]
cache_addd( ADD_REG_LSL_IMM(reg, reg, temp1, 0) ); // add reg, reg, temp1
}
#endif
@ -913,21 +1004,21 @@ static void gen_add_seg32_to_reg(HostReg reg,Bitu index) {
// mov 16bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 2 must be zero)
// 16bit moves may destroy the upper 16bit of the destination register
static void gen_mov_regval16_to_reg(HostReg dest_reg,Bitu index) {
cache_addd(0xe1d000b0 + (dest_reg << 12) + (FC_REGS_ADDR << 16) + ((index & 0xf0) << 4) + (index & 0x0f)); // ldrh dest_reg, [FC_REGS_ADDR, #index]
cache_addd( LDRH_IMM(dest_reg, FC_REGS_ADDR, index) ); // ldrh dest_reg, [FC_REGS_ADDR, #index]
}
// mov 32bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 4 must be zero)
static void gen_mov_regval32_to_reg(HostReg dest_reg,Bitu index) {
cache_addd(0xe5900000 + (dest_reg << 12) + (FC_REGS_ADDR << 16) + (index)); // ldr dest_reg, [FC_REGS_ADDR, #index]
cache_addd( LDR_IMM(dest_reg, FC_REGS_ADDR, index) ); // ldr dest_reg, [FC_REGS_ADDR, #index]
}
// move a 32bit (dword==true) or 16bit (dword==false) value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero)
// 16bit moves may destroy the upper 16bit of the destination register
static void gen_mov_regword_to_reg(HostReg dest_reg,Bitu index,bool dword) {
if (dword) {
cache_addd(0xe5900000 + (dest_reg << 12) + (FC_REGS_ADDR << 16) + (index)); // ldr dest_reg, [FC_REGS_ADDR, #index]
cache_addd( LDR_IMM(dest_reg, FC_REGS_ADDR, index) ); // ldr dest_reg, [FC_REGS_ADDR, #index]
} else {
cache_addd(0xe1d000b0 + (dest_reg << 12) + (FC_REGS_ADDR << 16) + ((index & 0xf0) << 4) + (index & 0x0f)); // ldrh dest_reg, [FC_REGS_ADDR, #index]
cache_addd( LDRH_IMM(dest_reg, FC_REGS_ADDR, index) ); // ldrh dest_reg, [FC_REGS_ADDR, #index]
}
}
@ -936,7 +1027,7 @@ static void gen_mov_regword_to_reg(HostReg dest_reg,Bitu index,bool dword) {
// this function does not use FC_OP1/FC_OP2 as dest_reg as these
// registers might not be directly byte-accessible on some architectures
static void gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index) {
cache_addd(0xe5d00000 + (dest_reg << 12) + (FC_REGS_ADDR << 16) + (index)); // ldrb dest_reg, [FC_REGS_ADDR, #index]
cache_addd( LDRB_IMM(dest_reg, FC_REGS_ADDR, index) ); // ldrb dest_reg, [FC_REGS_ADDR, #index]
}
// move an 8bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR
@ -944,39 +1035,39 @@ static void gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index) {
// this function can use FC_OP1/FC_OP2 as dest_reg which are
// not directly byte-accessible on some architectures
static void INLINE gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg,Bitu index) {
cache_addd(0xe5d00000 + (dest_reg << 12) + (FC_REGS_ADDR << 16) + (index)); // ldrb dest_reg, [FC_REGS_ADDR, #index]
cache_addd( LDRB_IMM(dest_reg, FC_REGS_ADDR, index) ); // ldrb dest_reg, [FC_REGS_ADDR, #index]
}
// add a 32bit value from cpu_regs[index] to a full register using FC_REGS_ADDR (index modulo 4 must be zero)
static void gen_add_regval32_to_reg(HostReg reg,Bitu index) {
cache_addd(0xe5900000 + (temp2 << 12) + (FC_REGS_ADDR << 16) + (index)); // ldr temp2, [FC_REGS_ADDR, #index]
cache_addd(0xe0800000 + (reg << 12) + (reg << 16) + (temp2)); // add reg, reg, temp2
cache_addd( LDR_IMM(temp2, FC_REGS_ADDR, index) ); // ldr temp2, [FC_REGS_ADDR, #index]
cache_addd( ADD_REG_LSL_IMM(reg, reg, temp2, 0) ); // add reg, reg, temp2
}
// move 16bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 2 must be zero)
static void gen_mov_regval16_from_reg(HostReg src_reg,Bitu index) {
cache_addd(0xe1c000b0 + (src_reg << 12) + (FC_REGS_ADDR << 16) + ((index & 0xf0) << 4) + (index & 0x0f)); // strh src_reg, [FC_REGS_ADDR, #index]
cache_addd( STRH_IMM(src_reg, FC_REGS_ADDR, index) ); // strh src_reg, [FC_REGS_ADDR, #index]
}
// move 32bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 4 must be zero)
static void gen_mov_regval32_from_reg(HostReg src_reg,Bitu index) {
cache_addd(0xe5800000 + (src_reg << 12) + (FC_REGS_ADDR << 16) + (index)); // str src_reg, [FC_REGS_ADDR, #index]
cache_addd( STR_IMM(src_reg, FC_REGS_ADDR, index) ); // str src_reg, [FC_REGS_ADDR, #index]
}
// move 32bit (dword==true) or 16bit (dword==false) of a register into cpu_regs[index] using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero)
static void gen_mov_regword_from_reg(HostReg src_reg,Bitu index,bool dword) {
if (dword) {
cache_addd(0xe5800000 + (src_reg << 12) + (FC_REGS_ADDR << 16) + (index)); // str src_reg, [FC_REGS_ADDR, #index]
cache_addd( STR_IMM(src_reg, FC_REGS_ADDR, index) ); // str src_reg, [FC_REGS_ADDR, #index]
} else {
cache_addd(0xe1c000b0 + (src_reg << 12) + (FC_REGS_ADDR << 16) + ((index & 0xf0) << 4) + (index & 0x0f)); // strh src_reg, [FC_REGS_ADDR, #index]
cache_addd( STRH_IMM(src_reg, FC_REGS_ADDR, index) ); // strh src_reg, [FC_REGS_ADDR, #index]
}
}
// move the lowest 8bit of a register into cpu_regs[index] using FC_REGS_ADDR
static void gen_mov_regbyte_from_reg_low(HostReg src_reg,Bitu index) {
cache_addd(0xe5c00000 + (src_reg << 12) + (FC_REGS_ADDR << 16) + (index)); // strb src_reg, [FC_REGS_ADDR, #index]
cache_addd( STRB_IMM(src_reg, FC_REGS_ADDR, index) ); // strb src_reg, [FC_REGS_ADDR, #index]
}
#endif

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2002-2008 The DOSBox Team
* Copyright (C) 2002-2009 The DOSBox Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -16,7 +16,7 @@
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
/* $Id: risc_armv4le-s3.h,v 1.3 2008/09/19 16:48:02 c2woody Exp $ */
/* $Id: risc_armv4le-s3.h,v 1.4 2009/05/16 21:52:47 c2woody Exp $ */
/* ARMv4 (little endian) backend by M-HT (speed-tweaked arm version) */
@ -58,20 +58,109 @@
#define FC_SEGS_ADDR HOST_v8
#endif
// helper macro
#define ROTATE_SCALE(x) ( (x)?(32 - x):(0) )
// instruction encodings
// move
// mov dst, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0
#define MOV_IMM(dst, imm, rimm) (0xe3a00000 + ((dst) << 12) + (imm) + ((rimm) << 7) )
// mov dst, src, lsl #imm
#define MOV_REG_LSL_IMM(dst, src, imm) (0xe1a00000 + ((dst) << 12) + (src) + ((imm) << 7) )
// movs dst, src, lsl #imm
#define MOVS_REG_LSL_IMM(dst, src, imm) (0xe1b00000 + ((dst) << 12) + (src) + ((imm) << 7) )
// mov dst, src, lsr #imm
#define MOV_REG_LSR_IMM(dst, src, imm) (0xe1a00020 + ((dst) << 12) + (src) + ((imm) << 7) )
// mov dst, src, asr #imm
#define MOV_REG_ASR_IMM(dst, src, imm) (0xe1a00040 + ((dst) << 12) + (src) + ((imm) << 7) )
// mov dst, src, lsl rreg
#define MOV_REG_LSL_REG(dst, src, rreg) (0xe1a00010 + ((dst) << 12) + (src) + ((rreg) << 8) )
// mov dst, src, lsr rreg
#define MOV_REG_LSR_REG(dst, src, rreg) (0xe1a00030 + ((dst) << 12) + (src) + ((rreg) << 8) )
// mov dst, src, asr rreg
#define MOV_REG_ASR_REG(dst, src, rreg) (0xe1a00050 + ((dst) << 12) + (src) + ((rreg) << 8) )
// mov dst, src, ror rreg
#define MOV_REG_ROR_REG(dst, src, rreg) (0xe1a00070 + ((dst) << 12) + (src) + ((rreg) << 8) )
// mvn dst, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0
#define MVN_IMM(dst, imm, rimm) (0xe3e00000 + ((dst) << 12) + (imm) + ((rimm) << 7) )
// arithmetic
// add dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0
#define ADD_IMM(dst, src, imm, rimm) (0xe2800000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) )
// add dst, src1, src2, lsl #imm
#define ADD_REG_LSL_IMM(dst, src1, src2, imm) (0xe0800000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) )
// sub dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0
#define SUB_IMM(dst, src, imm, rimm) (0xe2400000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) )
// sub dst, src1, src2, lsl #imm
#define SUB_REG_LSL_IMM(dst, src1, src2, imm) (0xe0400000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) )
// rsb dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0
#define RSB_IMM(dst, src, imm, rimm) (0xe2600000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) )
// cmp src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0
#define CMP_IMM(src, imm, rimm) (0xe3500000 + ((src) << 16) + (imm) + ((rimm) << 7) )
// nop
#define NOP MOV_REG_LSL_IMM(HOST_r0, HOST_r0, 0)
// logical
// tst src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0
#define TST_IMM(src, imm, rimm) (0xe3100000 + ((src) << 16) + (imm) + ((rimm) << 7) )
// and dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0
#define AND_IMM(dst, src, imm, rimm) (0xe2000000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) )
// and dst, src1, src2, lsl #imm
#define AND_REG_LSL_IMM(dst, src1, src2, imm) (0xe0000000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) )
// orr dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0
#define ORR_IMM(dst, src, imm, rimm) (0xe3800000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) )
// orr dst, src1, src2, lsl #imm
#define ORR_REG_LSL_IMM(dst, src1, src2, imm) (0xe1800000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) )
// orr dst, src1, src2, lsr #imm
#define ORR_REG_LSR_IMM(dst, src1, src2, imm) (0xe1800020 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) )
// eor dst, src1, src2, lsl #imm
#define EOR_REG_LSL_IMM(dst, src1, src2, imm) (0xe0200000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) )
// bic dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0
#define BIC_IMM(dst, src, imm, rimm) (0xe3c00000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) )
// load
// ldr reg, [addr, #imm] @ 0 <= imm < 4096
#define LDR_IMM(reg, addr, imm) (0xe5900000 + ((reg) << 12) + ((addr) << 16) + (imm) )
// ldrh reg, [addr, #imm] @ 0 <= imm < 256
#define LDRH_IMM(reg, addr, imm) (0xe1d000b0 + ((reg) << 12) + ((addr) << 16) + (((imm) & 0xf0) << 4) + ((imm) & 0x0f) )
// ldrb reg, [addr, #imm] @ 0 <= imm < 4096
#define LDRB_IMM(reg, addr, imm) (0xe5d00000 + ((reg) << 12) + ((addr) << 16) + (imm) )
// store
// str reg, [addr, #imm] @ 0 <= imm < 4096
#define STR_IMM(reg, addr, imm) (0xe5800000 + ((reg) << 12) + ((addr) << 16) + (imm) )
// strh reg, [addr, #imm] @ 0 <= imm < 256
#define STRH_IMM(reg, addr, imm) (0xe1c000b0 + ((reg) << 12) + ((addr) << 16) + (((imm) & 0xf0) << 4) + ((imm) & 0x0f) )
// strb reg, [addr, #imm] @ 0 <= imm < 4096
#define STRB_IMM(reg, addr, imm) (0xe5c00000 + ((reg) << 12) + ((addr) << 16) + (imm) )
// branch
// beq pc+imm @ 0 <= imm < 32M & imm mod 4 = 0
#define BEQ_FWD(imm) (0x0a000000 + ((imm) >> 2) )
// bne pc+imm @ 0 <= imm < 32M & imm mod 4 = 0
#define BNE_FWD(imm) (0x1a000000 + ((imm) >> 2) )
// bgt pc+imm @ 0 <= imm < 32M & imm mod 4 = 0
#define BGT_FWD(imm) (0xca000000 + ((imm) >> 2) )
// b pc+imm @ 0 <= imm < 32M & imm mod 4 = 0
#define B_FWD(imm) (0xea000000 + ((imm) >> 2) )
// bx reg
#define BX(reg) (0xe12fff10 + (reg) )
// move a full register from reg_src to reg_dst
static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) {
if(reg_src == reg_dst) return;
cache_addd(0xe1a00000 + (reg_dst << 12) + reg_src); // mov reg_dst, reg_src
cache_addd( MOV_REG_LSL_IMM(reg_dst, reg_src, 0) ); // mov reg_dst, reg_src
}
// move a 32bit constant value into dest_reg
static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) {
Bits first, scale;
if (imm == 0) {
cache_addd(0xe3a00000 + (dest_reg << 12)); // mov dest_reg, #0
cache_addd( MOV_IMM(dest_reg, 0, 0) ); // mov dest_reg, #0
} else {
scale = 0;
first = 1;
@ -81,10 +170,10 @@ static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) {
scale+=2;
}
if (first) {
cache_addd(0xe3a00000 + (dest_reg << 12) + (ROTATE_SCALE(scale) << 7) + (imm & 0xff)); // mov dest_reg, #((imm & 0xff) << scale)
cache_addd( MOV_IMM(dest_reg, imm & 0xff, ROTATE_SCALE(scale)) ); // mov dest_reg, #((imm & 0xff) << scale)
first = 0;
} else {
cache_addd(0xe3800000 + (dest_reg << 12) + (dest_reg << 16) + (ROTATE_SCALE(scale) << 7) + (imm & 0xff)); // orr dest_reg, dest_reg, #((imm & 0xff) << scale)
cache_addd( ORR_IMM(dest_reg, dest_reg, imm & 0xff, ROTATE_SCALE(scale)) ); // orr dest_reg, dest_reg, #((imm & 0xff) << scale)
}
imm>>=8;
scale+=8;
@ -98,26 +187,26 @@ static void gen_mov_word_to_reg_helper(HostReg dest_reg,void* data,bool dword,Ho
if (dword) {
if ((Bit32u)data & 3) {
if ( ((Bit32u)data & 3) == 2 ) {
cache_addd(0xe1d000b0 + (dest_reg << 12) + (data_reg << 16)); // ldrh dest_reg, [data_reg]
cache_addd(0xe1d000b2 + (temp2 << 12) + (data_reg << 16)); // ldrh temp2, [data_reg, #2]
cache_addd(0xe1800800 + (dest_reg << 12) + (dest_reg << 16) + (temp2)); // orr dest_reg, dest_reg, temp2, lsl #16
cache_addd( LDRH_IMM(dest_reg, data_reg, 0) ); // ldrh dest_reg, [data_reg]
cache_addd( LDRH_IMM(temp2, data_reg, 2) ); // ldrh temp2, [data_reg, #2]
cache_addd( ORR_REG_LSL_IMM(dest_reg, dest_reg, temp2, 16) ); // orr dest_reg, dest_reg, temp2, lsl #16
} else {
cache_addd(0xe5d00000 + (dest_reg << 12) + (data_reg << 16)); // ldrb dest_reg, [data_reg]
cache_addd(0xe1d000b1 + (temp2 << 12) + (data_reg << 16)); // ldrh temp2, [data_reg, #1]
cache_addd(0xe1800400 + (dest_reg << 12) + (dest_reg << 16) + (temp2)); // orr dest_reg, dest_reg, temp2, lsl #8
cache_addd(0xe5d00003 + (temp2 << 12) + (data_reg << 16)); // ldrb temp2, [data_reg, #3]
cache_addd(0xe1800c00 + (dest_reg << 12) + (dest_reg << 16) + (temp2)); // orr dest_reg, dest_reg, temp2, lsl #24
cache_addd( LDRB_IMM(dest_reg, data_reg, 0) ); // ldrb dest_reg, [data_reg]
cache_addd( LDRH_IMM(temp2, data_reg, 1) ); // ldrh temp2, [data_reg, #1]
cache_addd( ORR_REG_LSL_IMM(dest_reg, dest_reg, temp2, 8) ); // orr dest_reg, dest_reg, temp2, lsl #8
cache_addd( LDRB_IMM(temp2, data_reg, 3) ); // ldrb temp2, [data_reg, #3]
cache_addd( ORR_REG_LSL_IMM(dest_reg, dest_reg, temp2, 24) ); // orr dest_reg, dest_reg, temp2, lsl #24
}
} else {
cache_addd(0xe5900000 + (dest_reg << 12) + (data_reg << 16)); // ldr dest_reg, [data_reg]
cache_addd( LDR_IMM(dest_reg, data_reg, 0) ); // ldr dest_reg, [data_reg]
}
} else {
if ((Bit32u)data & 1) {
cache_addd(0xe5d00000 + (dest_reg << 12) + (data_reg << 16)); // ldrb dest_reg, [data_reg]
cache_addd(0xe5d00001 + (temp2 << 12) + (data_reg << 16)); // ldrb temp2, [data_reg, #1]
cache_addd(0xe1800400 + (dest_reg << 12) + (dest_reg << 16) + (temp2)); // orr dest_reg, dest_reg, temp2, lsl #8
cache_addd( LDRB_IMM(dest_reg, data_reg, 0) ); // ldrb dest_reg, [data_reg]
cache_addd( LDRB_IMM(temp2, data_reg, 1) ); // ldrb temp2, [data_reg, #1]
cache_addd( ORR_REG_LSL_IMM(dest_reg, dest_reg, temp2, 8) ); // orr dest_reg, dest_reg, temp2, lsl #8
} else {
cache_addd(0xe1d000b0 + (dest_reg << 12) + (data_reg << 16)); // ldrh dest_reg, [data_reg]
cache_addd( LDRH_IMM(dest_reg, data_reg, 0) ); // ldrh dest_reg, [data_reg]
}
}
}
@ -141,26 +230,26 @@ static void gen_mov_word_from_reg_helper(HostReg src_reg,void* dest,bool dword,
if (dword) {
if ((Bit32u)dest & 3) {
if ( ((Bit32u)dest & 3) == 2 ) {
cache_addd(0xe1c000b0 + (src_reg << 12) + (data_reg << 16)); // strh src_reg, [data_reg]
cache_addd(0xe1a00820 + (temp2 << 12) + (src_reg)); // mov temp2, src_reg, lsr #16
cache_addd(0xe1c000b2 + (temp2 << 12) + (data_reg << 16)); // strh temp2, [data_reg, #2]
cache_addd( STRH_IMM(src_reg, data_reg, 0) ); // strh src_reg, [data_reg]
cache_addd( MOV_REG_LSR_IMM(temp2, src_reg, 16) ); // mov temp2, src_reg, lsr #16
cache_addd( STRH_IMM(temp2, data_reg, 2) ); // strh temp2, [data_reg, #2]
} else {
cache_addd(0xe5c00000 + (src_reg << 12) + (data_reg << 16)); // strb src_reg, [data_reg]
cache_addd(0xe1a00420 + (temp2 << 12) + (src_reg)); // mov temp2, src_reg, lsr #8
cache_addd(0xe1c000b1 + (temp2 << 12) + (data_reg << 16)); // strh temp2, [data_reg, #1]
cache_addd(0xe1a00820 + (temp2 << 12) + (temp2)); // mov temp2, temp2, lsr #16
cache_addd(0xe5c00003 + (temp2 << 12) + (data_reg << 16)); // strb temp2, [data_reg, #3]
cache_addd( STRB_IMM(src_reg, data_reg, 0) ); // strb src_reg, [data_reg]
cache_addd( MOV_REG_LSR_IMM(temp2, src_reg, 8) ); // mov temp2, src_reg, lsr #8
cache_addd( STRH_IMM(temp2, data_reg, 1) ); // strh temp2, [data_reg, #1]
cache_addd( MOV_REG_LSR_IMM(temp2, temp2, 16) ); // mov temp2, temp2, lsr #16
cache_addd( STRB_IMM(temp2, data_reg, 3) ); // strb temp2, [data_reg, #3]
}
} else {
cache_addd(0xe5800000 + (src_reg << 12) + (data_reg << 16)); // str src_reg, [data_reg]
cache_addd( STR_IMM(src_reg, data_reg, 0) ); // str src_reg, [data_reg]
}
} else {
if ((Bit32u)dest & 1) {
cache_addd(0xe5c00000 + (src_reg << 12) + (data_reg << 16)); // strb src_reg, [data_reg]
cache_addd(0xe1a00420 + (temp2 << 12) + (src_reg)); // mov temp2, src_reg, lsr #8
cache_addd(0xe5c00001 + (temp2 << 12) + (data_reg << 16)); // strb temp2, [data_reg, #1]
cache_addd( STRB_IMM(src_reg, data_reg, 0) ); // strb src_reg, [data_reg]
cache_addd( MOV_REG_LSR_IMM(temp2, src_reg, 8) ); // mov temp2, src_reg, lsr #8
cache_addd( STRB_IMM(temp2, data_reg, 1) ); // strb temp2, [data_reg, #1]
} else {
cache_addd(0xe1c000b0 + (src_reg << 12) + (data_reg << 16)); // strh src_reg, [data_reg]
cache_addd( STRH_IMM(src_reg, data_reg, 0) ); // strh src_reg, [data_reg]
}
}
}
@ -177,7 +266,7 @@ static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) {
// registers might not be directly byte-accessible on some architectures
static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) {
gen_mov_dword_to_reg_imm(temp1, (Bit32u)data);
cache_addd(0xe5d00000 + (dest_reg << 12) + (temp1 << 16)); // ldrb dest_reg, [temp1]
cache_addd( LDRB_IMM(dest_reg, temp1, 0) ); // ldrb dest_reg, [temp1]
}
// move an 8bit value from memory into dest_reg
@ -193,7 +282,7 @@ static void INLINE gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* dat
// this function does not use FC_OP1/FC_OP2 as dest_reg as these
// registers might not be directly byte-accessible on some architectures
static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) {
cache_addd(0xe3a00000 + (dest_reg << 12) + (imm)); // mov dest_reg, #(imm)
cache_addd( MOV_IMM(dest_reg, imm, 0) ); // mov dest_reg, #(imm)
}
// move an 8bit constant value into dest_reg
@ -207,7 +296,7 @@ static void INLINE gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u
// move the lowest 8bit of a register into memory
static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) {
gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest);
cache_addd(0xe5c00000 + (src_reg << 12) + (temp1 << 16)); // strb src_reg, [temp1]
cache_addd( STRB_IMM(src_reg, temp1, 0) ); // strb src_reg, [temp1]
}
@ -216,10 +305,10 @@ static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) {
// the register is zero-extended (sign==false) or sign-extended (sign==true)
static void gen_extend_byte(bool sign,HostReg reg) {
if (sign) {
cache_addd(0xe1a00c00 + (reg << 12) + (reg)); // mov reg, reg, lsl #24
cache_addd(0xe1a00c40 + (reg << 12) + (reg)); // mov reg, reg, asr #24
cache_addd( MOV_REG_LSL_IMM(reg, reg, 24) ); // mov reg, reg, lsl #24
cache_addd( MOV_REG_ASR_IMM(reg, reg, 24) ); // mov reg, reg, asr #24
} else {
cache_addd(0xe20000ff + (reg << 12) + (reg << 16)); // and reg, reg, #0xff
cache_addd( AND_IMM(reg, reg, 0xff, 0) ); // and reg, reg, #0xff
}
}
@ -227,18 +316,18 @@ static void gen_extend_byte(bool sign,HostReg reg) {
// the register is zero-extended (sign==false) or sign-extended (sign==true)
static void gen_extend_word(bool sign,HostReg reg) {
if (sign) {
cache_addd(0xe1a00800 + (reg << 12) + (reg)); // mov reg, reg, lsl #16
cache_addd(0xe1a00840 + (reg << 12) + (reg)); // mov reg, reg, asr #16
cache_addd( MOV_REG_LSL_IMM(reg, reg, 16) ); // mov reg, reg, lsl #16
cache_addd( MOV_REG_ASR_IMM(reg, reg, 16) ); // mov reg, reg, asr #16
} else {
cache_addd(0xe1a00800 + (reg << 12) + (reg)); // mov reg, reg, lsl #16
cache_addd(0xe1a00820 + (reg << 12) + (reg)); // mov reg, reg, lsr #16
cache_addd( MOV_REG_LSL_IMM(reg, reg, 16) ); // mov reg, reg, lsl #16
cache_addd( MOV_REG_LSR_IMM(reg, reg, 16) ); // mov reg, reg, lsr #16
}
}
// add a 32bit value from memory to a full register
static void gen_add(HostReg reg,void* op) {
gen_mov_word_to_reg(temp3, op, 1);
cache_addd(0xe0800000 + (reg << 12) + (reg << 16) + (temp3)); // add reg, reg, temp3
cache_addd( ADD_REG_LSL_IMM(reg, reg, temp3, 0) ); // add reg, reg, temp3
}
// add a 32bit constant value to a full register
@ -246,7 +335,7 @@ static void gen_add_imm(HostReg reg,Bit32u imm) {
Bits scale;
if(!imm) return;
if (imm == 0xffffffff) {
cache_addd(0xe2400001 + (reg << 12) + (reg << 16)); // sub reg, reg, #1
cache_addd( SUB_IMM(reg, reg, 1, 0) ); // sub reg, reg, #1
} else {
scale = 0;
while (imm) {
@ -254,7 +343,7 @@ static void gen_add_imm(HostReg reg,Bit32u imm) {
imm>>=2;
scale+=2;
}
cache_addd(0xe2800000 + (reg << 12) + (reg << 16) + (ROTATE_SCALE(scale) << 7) + (imm & 0xff)); // add reg, reg, #((imm & 0xff) << scale)
cache_addd( ADD_IMM(reg, reg, imm & 0xff, ROTATE_SCALE(scale)) ); // add reg, reg, #((imm & 0xff) << scale)
imm>>=8;
scale+=8;
}
@ -268,7 +357,7 @@ static void gen_and_imm(HostReg reg,Bit32u imm) {
imm2 = ~imm;
if(!imm2) return;
if (!imm) {
cache_addd(0xe3a00000 + (reg << 12)); // mov reg, #0
cache_addd( MOV_IMM(reg, 0, 0) ); // mov reg, #0
} else {
scale = 0;
while (imm2) {
@ -276,7 +365,7 @@ static void gen_and_imm(HostReg reg,Bit32u imm) {
imm2>>=2;
scale+=2;
}
cache_addd(0xe3c00000 + (reg << 12) + (reg << 16) + (ROTATE_SCALE(scale) << 7) + (imm2 & 0xff)); // bic reg, reg, #((imm2 & 0xff) << scale)
cache_addd( BIC_IMM(reg, reg, imm2 & 0xff, ROTATE_SCALE(scale)) ); // bic reg, reg, #((imm2 & 0xff) << scale)
imm2>>=8;
scale+=8;
}
@ -301,9 +390,9 @@ static void gen_add_direct_byte(void* dest,Bit8s imm) {
gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest);
gen_mov_word_to_reg_helper(temp3, dest, 1, temp1);
if (imm >= 0) {
cache_addd(0xe2800000 + (temp3 << 12) + (temp3 << 16) + ((Bit32s)imm)); // add temp3, temp3, #(imm)
cache_addd( ADD_IMM(temp3, temp3, (Bit32s)imm, 0) ); // add temp3, temp3, #(imm)
} else {
cache_addd(0xe2400000 + (temp3 << 12) + (temp3 << 16) + (-((Bit32s)imm))); // sub temp3, temp3, #(-imm)
cache_addd( SUB_IMM(temp3, temp3, -((Bit32s)imm), 0) ); // sub temp3, temp3, #(-imm)
}
gen_mov_word_from_reg_helper(temp3, dest, 1, temp1);
}
@ -323,7 +412,7 @@ static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) {
} else {
gen_mov_word_to_reg_imm(temp2, (Bit16u)imm);
}
cache_addd(0xe0800000 + (temp3 << 12) + (temp3 << 16) + (temp2)); // add temp3, temp3, temp2
cache_addd( ADD_REG_LSL_IMM(temp3, temp3, temp2, 0) ); // add temp3, temp3, temp2
gen_mov_word_from_reg_helper(temp3, dest, dword, temp1);
}
@ -333,9 +422,9 @@ static void gen_sub_direct_byte(void* dest,Bit8s imm) {
gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest);
gen_mov_word_to_reg_helper(temp3, dest, 1, temp1);
if (imm >= 0) {
cache_addd(0xe2400000 + (temp3 << 12) + (temp3 << 16) + ((Bit32s)imm)); // sub temp3, temp3, #(imm)
cache_addd( SUB_IMM(temp3, temp3, (Bit32s)imm, 0) ); // sub temp3, temp3, #(imm)
} else {
cache_addd(0xe2800000 + (temp3 << 12) + (temp3 << 16) + (-((Bit32s)imm))); // add temp3, temp3, #(-imm)
cache_addd( ADD_IMM(temp3, temp3, -((Bit32s)imm), 0) ); // add temp3, temp3, #(-imm)
}
gen_mov_word_from_reg_helper(temp3, dest, 1, temp1);
}
@ -355,7 +444,7 @@ static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) {
} else {
gen_mov_word_to_reg_imm(temp2, (Bit16u)imm);
}
cache_addd(0xe0400000 + (temp3 << 12) + (temp3 << 16) + (temp2)); // sub temp3, temp3, temp2
cache_addd( SUB_REG_LSL_IMM(temp3, temp3, temp2, 0) ); // sub temp3, temp3, temp2
gen_mov_word_from_reg_helper(temp3, dest, dword, temp1);
}
@ -363,7 +452,7 @@ static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) {
// scale_reg is scaled by scale (scale_reg*(2^scale)) and
// added to dest_reg, then the immediate value is added
static INLINE void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm) {
cache_addd(0xe0800000 + (dest_reg << 12) + (dest_reg << 16) + (scale_reg) + (scale << 7)); // add dest_reg, dest_reg, scale_reg, lsl #(scale)
cache_addd( ADD_REG_LSL_IMM(dest_reg, dest_reg, scale_reg, scale) ); // add dest_reg, dest_reg, scale_reg, lsl #(scale)
gen_add_imm(dest_reg, imm);
}
@ -372,18 +461,18 @@ static INLINE void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits im
// then the immediate value is added
static INLINE void gen_lea(HostReg dest_reg,Bitu scale,Bits imm) {
if (scale) {
cache_addd(0xe1a00000 + (dest_reg << 12) + (dest_reg) + (scale << 7)); // mov dest_reg, dest_reg, lsl #(scale)
cache_addd( MOV_REG_LSL_IMM(dest_reg, dest_reg, scale) ); // mov dest_reg, dest_reg, lsl #(scale)
}
gen_add_imm(dest_reg, imm);
}
// generate a call to a parameterless function
static void INLINE gen_call_function_raw(void * func) {
cache_addd(0xe5900004 + (temp1 << 12) + (HOST_pc << 16)); // ldr temp1, [pc, #4]
cache_addd(0xe2800004 + (HOST_lr << 12) + (HOST_pc << 16)); // add lr, pc, #4
cache_addd(0xe12fff10 + (temp1)); // bx temp1
cache_addd( LDR_IMM(temp1, HOST_pc, 4) ); // ldr temp1, [pc, #4]
cache_addd( ADD_IMM(HOST_lr, HOST_pc, 4, 0) ); // add lr, pc, #4
cache_addd( BX(temp1) ); // bx temp1
cache_addd((Bit32u)func); // .int func
cache_addd(0xe1a00000 + (FC_RETOP << 12) + HOST_a1); // mov FC_RETOP, a1
cache_addd( MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 0) ); // mov FC_RETOP, a1
}
// generate a call to a function with paramcount parameters
@ -435,7 +524,7 @@ static void gen_jmp_ptr(void * ptr,Bits imm=0) {
imm2>>=2;
scale+=2;
}
cache_addd(0xe2800000 + (temp3 << 12) + (temp3 << 16) + (ROTATE_SCALE(scale) << 7) + (imm2 & 0xff)); // add temp3, temp3, #((imm2 & 0xff) << scale)
cache_addd( ADD_IMM(temp3, temp3, imm2 & 0xff, ROTATE_SCALE(scale)) ); // add temp3, temp3, #((imm2 & 0xff) << scale)
imm2>>=8;
scale+=8;
}
@ -444,31 +533,31 @@ static void gen_jmp_ptr(void * ptr,Bits imm=0) {
#if (1)
// (*ptr) should be word aligned
if ((imm & 0x03) == 0) {
cache_addd(0xe5900000 + (temp1 << 12) + (temp3 << 16)); // ldr temp1, [temp3]
cache_addd( LDR_IMM(temp1, temp3, 0) ); // ldr temp1, [temp3]
} else
#endif
{
cache_addd(0xe5d00000 + (temp1 << 12) + (temp3 << 16)); // ldrb temp1, [temp3]
cache_addd(0xe5d00001 + (temp2 << 12) + (temp3 << 16)); // ldrb temp2, [temp3, #1]
cache_addd(0xe1800400 + (temp1 << 12) + (temp1 << 16) + (temp2)); // orr temp1, temp1, temp2, lsl #8
cache_addd(0xe5d00002 + (temp2 << 12) + (temp3 << 16)); // ldrb temp2, [temp3, #2]
cache_addd(0xe1800800 + (temp1 << 12) + (temp1 << 16) + (temp2)); // orr temp1, temp1, temp2, lsl #16
cache_addd(0xe5d00003 + (temp2 << 12) + (temp3 << 16)); // ldrb temp2, [temp3, #3]
cache_addd(0xe1800c00 + (temp1 << 12) + (temp1 << 16) + (temp2)); // orr temp1, temp1, temp2, lsl #24
cache_addd( LDRB_IMM(temp1, temp3, 0) ); // ldrb temp1, [temp3]
cache_addd( LDRB_IMM(temp2, temp3, 1) ); // ldrb temp2, [temp3, #1]
cache_addd( ORR_REG_LSL_IMM(temp1, temp1, temp2, 8) ); // orr temp1, temp1, temp2, lsl #8
cache_addd( LDRB_IMM(temp2, temp3, 2) ); // ldrb temp2, [temp3, #2]
cache_addd( ORR_REG_LSL_IMM(temp1, temp1, temp2, 16) ); // orr temp1, temp1, temp2, lsl #16
cache_addd( LDRB_IMM(temp2, temp3, 3) ); // ldrb temp2, [temp3, #3]
cache_addd( ORR_REG_LSL_IMM(temp1, temp1, temp2, 24) ); // orr temp1, temp1, temp2, lsl #24
}
cache_addd(0xe12fff10 + (temp1)); // bx temp1
cache_addd( BX(temp1) ); // bx temp1
}
// short conditional jump (+-127 bytes) if register is zero
// the destination is set by gen_fill_branch() later
static Bit32u gen_create_branch_on_zero(HostReg reg,bool dword) {
if (dword) {
cache_addd(0xe3500000 + (reg << 16)); // cmp reg, #0
cache_addd( CMP_IMM(reg, 0, 0) ); // cmp reg, #0
} else {
cache_addd(0xe1b00800 + (temp1 << 12) + (reg)); // movs temp1, reg, lsl #16
cache_addd( MOVS_REG_LSL_IMM(temp1, reg, 16) ); // movs temp1, reg, lsl #16
}
cache_addd(0x0a000000); // beq j
cache_addd( BEQ_FWD(0) ); // beq j
return ((Bit32u)cache.pos-4);
}
@ -476,11 +565,11 @@ static Bit32u gen_create_branch_on_zero(HostReg reg,bool dword) {
// the destination is set by gen_fill_branch() later
static Bit32u gen_create_branch_on_nonzero(HostReg reg,bool dword) {
if (dword) {
cache_addd(0xe3500000 + (reg << 16)); // cmp reg, #0
cache_addd( CMP_IMM(reg, 0, 0) ); // cmp reg, #0
} else {
cache_addd(0xe1b00800 + (temp1 << 12) + (reg)); // movs temp1, reg, lsl #16
cache_addd( MOVS_REG_LSL_IMM(temp1, reg, 16) ); // movs temp1, reg, lsl #16
}
cache_addd(0x1a000000); // bne j
cache_addd( BNE_FWD(0) ); // bne j
return ((Bit32u)cache.pos-4);
}
@ -499,13 +588,13 @@ static void INLINE gen_fill_branch(DRC_PTR_SIZE_IM data) {
// for isdword==false the lowest 8bit of the register are tested
static Bit32u gen_create_branch_long_nonzero(HostReg reg,bool isdword) {
if (isdword) {
cache_addd(0xe3500000 + (reg << 16)); // cmp reg, #0
cache_addd( CMP_IMM(reg, 0, 0) ); // cmp reg, #0
} else {
cache_addd(0xe31000ff + (reg << 16)); // tst reg, #0xff
cache_addd( TST_IMM(reg, 0xff, 0) ); // tst reg, #0xff
}
cache_addd(0x0a000002); // beq nobranch
cache_addd(0xe5900000 + (temp1 << 12) + (HOST_pc << 16)); // ldr temp1, [pc, #0]
cache_addd(0xe12fff10 + (temp1)); // bx temp1
cache_addd( BEQ_FWD(8) ); // beq nobranch (pc +8)
cache_addd( LDR_IMM(temp1, HOST_pc, 0) ); // ldr temp1, [pc, #0]
cache_addd( BX(temp1) ); // bx temp1
cache_addd(0); // fill j
// nobranch:
return ((Bit32u)cache.pos-4);
@ -513,10 +602,10 @@ static Bit32u gen_create_branch_long_nonzero(HostReg reg,bool isdword) {
// compare 32bit-register against zero and jump if value less/equal than zero
static Bit32u gen_create_branch_long_leqzero(HostReg reg) {
cache_addd(0xe3500000 + (reg << 16)); // cmp reg, #0
cache_addd(0xca000002); // bgt nobranch
cache_addd(0xe5900000 + (temp1 << 12) + (HOST_pc << 16)); // ldr temp1, [pc, #0]
cache_addd(0xe12fff10 + (temp1)); // bx temp1
cache_addd( CMP_IMM(reg, 0, 0) ); // cmp reg, #0
cache_addd( BGT_FWD(8) ); // bgt nobranch (pc+8)
cache_addd( LDR_IMM(temp1, HOST_pc, 0) ); // ldr temp1, [pc, #0]
cache_addd( BX(temp1) ); // bx temp1
cache_addd(0); // fill j
// nobranch:
return ((Bit32u)cache.pos-4);
@ -533,22 +622,26 @@ static void gen_run_code(void) {
cache_addd(0xe92d0df0); // stmfd sp!, {v1-v5,v7,v8}
// adr: 8
cache_addd(0xe5900000 + (FC_SEGS_ADDR << 12) + (HOST_pc << 16) + (64 - (8 + 8))); // ldr FC_SEGS_ADDR, [pc, #(&Segs)]
cache_addd( LDR_IMM(FC_SEGS_ADDR, HOST_pc, 64 - (8 + 8)) ); // ldr FC_SEGS_ADDR, [pc, #(&Segs)]
// adr: 12
cache_addd(0xe5900000 + (FC_REGS_ADDR << 12) + (HOST_pc << 16) + (68 - (12 + 8))); // ldr FC_REGS_ADDR, [pc, #(&cpu_regs)]
cache_addd(0xe28fe004); // add lr, pc, #4
cache_addd( LDR_IMM(FC_REGS_ADDR, HOST_pc, 68 - (12 + 8)) ); // ldr FC_REGS_ADDR, [pc, #(&cpu_regs)]
cache_addd( ADD_IMM(HOST_lr, HOST_pc, 4, 0) ); // add lr, pc, #4
cache_addd(0xe92d4000); // stmfd sp!, {lr}
cache_addd(0xe12fff10); // bx r0
cache_addd( BX(HOST_r0) ); // bx r0
cache_addd(0xe8bd0df0); // ldmfd sp!, {v1-v5,v7,v8}
cache_addd(0xe8bd4000); // ldmfd sp!, {lr}
cache_addd(0xe12fff1e); // bx lr
cache_addd( BX(HOST_lr) ); // bx lr
// fill up to 64 bytes
cache_addd(0xe1a00000); // nop
cache_addd(0xe1a00000); // nop
cache_addd(0xe1a00000); // nop
cache_addd(0xe1a00000); // nop
cache_addd(0xe1a00000); // nop
cache_addd(0xe1a00000); // nop
cache_addd( NOP ); // nop
cache_addd( NOP ); // nop
cache_addd( NOP ); // nop
cache_addd( NOP ); // nop
cache_addd( NOP ); // nop
cache_addd( NOP ); // nop
// adr: 64
cache_addd((Bit32u)&Segs); // address of "Segs"
@ -558,9 +651,9 @@ static void gen_run_code(void) {
// return from a function
static void gen_return_function(void) {
cache_addd(0xe1a00000 + (HOST_a1 << 12) + FC_RETOP); // mov a1, FC_RETOP
cache_addd( MOV_REG_LSL_IMM(HOST_a1, FC_RETOP, 0) ); // mov a1, FC_RETOP
cache_addd(0xe8bd4000); // ldmfd sp!, {lr}
cache_addd(0xe12fff1e); // bx lr
cache_addd( BX(HOST_lr) ); // bx lr
}
#ifdef DRC_FLAGS_INVALIDATION
@ -574,32 +667,32 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) {
case t_ADDb:
case t_ADDw:
case t_ADDd:
*(Bit32u*)pos=0xe0800000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // add FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
*(Bit32u*)pos=ADD_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // add FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4)
break;
case t_ORb:
case t_ORw:
case t_ORd:
*(Bit32u*)pos=0xe1800000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // orr FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
*(Bit32u*)pos=ORR_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // orr FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4)
break;
case t_ANDb:
case t_ANDw:
case t_ANDd:
*(Bit32u*)pos=0xe0000000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // and FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
*(Bit32u*)pos=AND_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // and FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4)
break;
case t_SUBb:
case t_SUBw:
case t_SUBd:
*(Bit32u*)pos=0xe0400000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // sub FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
*(Bit32u*)pos=SUB_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // sub FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4)
break;
case t_XORb:
case t_XORw:
case t_XORd:
*(Bit32u*)pos=0xe0200000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // eor FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
*(Bit32u*)pos=EOR_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // eor FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4)
break;
case t_CMPb:
case t_CMPw:
@ -607,106 +700,106 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) {
case t_TESTb:
case t_TESTw:
case t_TESTd:
*(Bit32u*)pos=0xea000000 + (3); // b (pc+3*4)
*(Bit32u*)pos=B_FWD(12); // b (pc+3*4)
break;
case t_INCb:
case t_INCw:
case t_INCd:
*(Bit32u*)pos=0xe2800000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (1); // add FC_RETOP, a1, #1
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
*(Bit32u*)pos=ADD_IMM(FC_RETOP, HOST_a1, 1, 0); // add FC_RETOP, a1, #1
*(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4)
break;
case t_DECb:
case t_DECw:
case t_DECd:
*(Bit32u*)pos=0xe2400000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (1); // sub FC_RETOP, a1, #1
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
*(Bit32u*)pos=SUB_IMM(FC_RETOP, HOST_a1, 1, 0); // sub FC_RETOP, a1, #1
*(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4)
break;
case t_SHLb:
case t_SHLw:
case t_SHLd:
*(Bit32u*)pos=0xe1a00010 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, lsl a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
*(Bit32u*)pos=MOV_REG_LSL_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, lsl a2
*(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4)
break;
case t_SHRb:
*(Bit32u*)pos=0xe2000000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (0xff); // and FC_RETOP, a1, #0xff
*(Bit32u*)(pos+4)=0xe1a00030 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, lsr a2
*(Bit32u*)(pos+8)=0xe1a00000; // nop
*(Bit32u*)(pos+12)=0xe1a00000; // nop
*(Bit32u*)(pos+16)=0xe1a00000; // nop
*(Bit32u*)pos=AND_IMM(FC_RETOP, HOST_a1, 0xff, 0); // and FC_RETOP, a1, #0xff
*(Bit32u*)(pos+4)=MOV_REG_LSR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, lsr a2
*(Bit32u*)(pos+8)=NOP; // nop
*(Bit32u*)(pos+12)=NOP; // nop
*(Bit32u*)(pos+16)=NOP; // nop
break;
case t_SHRw:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (16 << 7); // mov FC_RETOP, a1, lsl #16
*(Bit32u*)(pos+4)=0xe1a00020 + (FC_RETOP << 12) + (FC_RETOP) + (16 << 7); // mov FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+8)=0xe1a00030 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, lsr a2
*(Bit32u*)(pos+12)=0xe1a00000; // nop
*(Bit32u*)(pos+16)=0xe1a00000; // nop
*(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16
*(Bit32u*)(pos+4)=MOV_REG_LSR_IMM(FC_RETOP, FC_RETOP, 16); // mov FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+8)=MOV_REG_LSR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, lsr a2
*(Bit32u*)(pos+12)=NOP; // nop
*(Bit32u*)(pos+16)=NOP; // nop
break;
case t_SHRd:
*(Bit32u*)pos=0xe1a00030 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, lsr a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
*(Bit32u*)pos=MOV_REG_LSR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, lsr a2
*(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4)
break;
case t_SARb:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (24 << 7); // mov FC_RETOP, a1, lsl #24
*(Bit32u*)(pos+4)=0xe1a00040 + (FC_RETOP << 12) + (FC_RETOP) + (24 << 7); // mov FC_RETOP, FC_RETOP, asr #24
*(Bit32u*)(pos+8)=0xe1a00050 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, asr a2
*(Bit32u*)(pos+12)=0xe1a00000; // nop
*(Bit32u*)(pos+16)=0xe1a00000; // nop
*(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 24); // mov FC_RETOP, a1, lsl #24
*(Bit32u*)(pos+4)=MOV_REG_ASR_IMM(FC_RETOP, FC_RETOP, 24); // mov FC_RETOP, FC_RETOP, asr #24
*(Bit32u*)(pos+8)=MOV_REG_ASR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, asr a2
*(Bit32u*)(pos+12)=NOP; // nop
*(Bit32u*)(pos+16)=NOP; // nop
break;
case t_SARw:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (16 << 7); // mov FC_RETOP, a1, lsl #16
*(Bit32u*)(pos+4)=0xe1a00040 + (FC_RETOP << 12) + (FC_RETOP) + (16 << 7); // mov FC_RETOP, FC_RETOP, asr #16
*(Bit32u*)(pos+8)=0xe1a00050 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, asr a2
*(Bit32u*)(pos+12)=0xe1a00000; // nop
*(Bit32u*)(pos+16)=0xe1a00000; // nop
*(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16
*(Bit32u*)(pos+4)=MOV_REG_ASR_IMM(FC_RETOP, FC_RETOP, 16); // mov FC_RETOP, FC_RETOP, asr #16
*(Bit32u*)(pos+8)=MOV_REG_ASR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, asr a2
*(Bit32u*)(pos+12)=NOP; // nop
*(Bit32u*)(pos+16)=NOP; // nop
break;
case t_SARd:
*(Bit32u*)pos=0xe1a00050 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, asr a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
*(Bit32u*)pos=MOV_REG_ASR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, asr a2
*(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4)
break;
case t_RORb:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (24 << 7); // mov FC_RETOP, a1, lsl #24
*(Bit32u*)(pos+4)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (8 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #8
*(Bit32u*)(pos+8)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (16 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+12)=0xe1a00070 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, ror a2
*(Bit32u*)(pos+16)=0xe1a00000; // nop
*(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 24); // mov FC_RETOP, a1, lsl #24
*(Bit32u*)(pos+4)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 8); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #8
*(Bit32u*)(pos+8)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+12)=MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, ror a2
*(Bit32u*)(pos+16)=NOP; // nop
break;
case t_RORw:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (16 << 7); // mov FC_RETOP, a1, lsl #16
*(Bit32u*)(pos+4)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (16 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+8)=0xe1a00070 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, ror a2
*(Bit32u*)(pos+12)=0xe1a00000; // nop
*(Bit32u*)(pos+16)=0xe1a00000; // nop
*(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16
*(Bit32u*)(pos+4)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+8)=MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, ror a2
*(Bit32u*)(pos+12)=NOP; // nop
*(Bit32u*)(pos+16)=NOP; // nop
break;
case t_RORd:
*(Bit32u*)pos=0xe1a00070 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, ror a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
*(Bit32u*)pos=MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, ror a2
*(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4)
break;
case t_ROLb:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (24 << 7); // mov FC_RETOP, a1, lsl #24
*(Bit32u*)(pos+4)=0xe2600000 + (HOST_a2 << 12) + (HOST_a2 << 16) + (32); // rsb a2, a2, #32
*(Bit32u*)(pos+8)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (8 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #8
*(Bit32u*)(pos+12)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (16 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+16)=0xe1a00070 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, ror a2
*(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 24); // mov FC_RETOP, a1, lsl #24
*(Bit32u*)(pos+4)=RSB_IMM(HOST_a2, HOST_a2, 32, 0); // rsb a2, a2, #32
*(Bit32u*)(pos+8)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 8); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #8
*(Bit32u*)(pos+12)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+16)=MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, ror a2
break;
case t_ROLw:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (16 << 7); // mov FC_RETOP, a1, lsl #16
*(Bit32u*)(pos+4)=0xe2600000 + (HOST_a2 << 12) + (HOST_a2 << 16) + (32); // rsb a2, a2, #32
*(Bit32u*)(pos+8)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (16 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+12)=0xe1a00070 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, ror a2
*(Bit32u*)(pos+16)=0xe1a00000; // nop
*(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16
*(Bit32u*)(pos+4)=RSB_IMM(HOST_a2, HOST_a2, 32, 0); // rsb a2, a2, #32
*(Bit32u*)(pos+8)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+12)=MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, ror a2
*(Bit32u*)(pos+16)=NOP; // nop
break;
case t_ROLd:
*(Bit32u*)pos=0xe2600000 + (HOST_a2 << 12) + (HOST_a2 << 16) + (32); // rsb a2, a2, #32
*(Bit32u*)(pos+4)=0xe1a00070 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, ror a2
*(Bit32u*)(pos+8)=0xe1a00000; // nop
*(Bit32u*)(pos+12)=0xe1a00000; // nop
*(Bit32u*)(pos+16)=0xe1a00000; // nop
*(Bit32u*)pos=RSB_IMM(HOST_a2, HOST_a2, 32, 0); // rsb a2, a2, #32
*(Bit32u*)(pos+4)=MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, ror a2
*(Bit32u*)(pos+8)=NOP; // nop
*(Bit32u*)(pos+12)=NOP; // nop
*(Bit32u*)(pos+16)=NOP; // nop
break;
case t_NEGb:
case t_NEGw:
case t_NEGd:
*(Bit32u*)pos=0xe2600000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (0); // rsb FC_RETOP, a1, #0
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
*(Bit32u*)pos=RSB_IMM(FC_RETOP, HOST_a1, 0, 0); // rsb FC_RETOP, a1, #0
*(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4)
break;
default:
*(Bit32u*)(pos+12)=(Bit32u)fct_ptr; // simple_func
@ -721,24 +814,23 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) {
static void cache_block_before_close(void) { }
#ifdef DRC_USE_SEGS_ADDR
// mov 16bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 2 must be zero)
// 16bit moves may destroy the upper 16bit of the destination register
static void gen_mov_seg16_to_reg(HostReg dest_reg,Bitu index) {
cache_addd(0xe1d000b0 + (dest_reg << 12) + (FC_SEGS_ADDR << 16) + ((index & 0xf0) << 4) + (index & 0x0f)); // ldrh dest_reg, [FC_SEGS_ADDR, #index]
cache_addd( LDRH_IMM(dest_reg, FC_SEGS_ADDR, index) ); // ldrh dest_reg, [FC_SEGS_ADDR, #index]
}
// mov 32bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 4 must be zero)
static void gen_mov_seg32_to_reg(HostReg dest_reg,Bitu index) {
cache_addd(0xe5900000 + (dest_reg << 12) + (FC_SEGS_ADDR << 16) + (index)); // ldr dest_reg, [FC_SEGS_ADDR, #index]
cache_addd( LDR_IMM(dest_reg, FC_SEGS_ADDR, index) ); // ldr dest_reg, [FC_SEGS_ADDR, #index]
}
// add a 32bit value from Segs[index] to a full register using FC_SEGS_ADDR (index modulo 4 must be zero)
static void gen_add_seg32_to_reg(HostReg reg,Bitu index) {
cache_addd(0xe5900000 + (temp1 << 12) + (FC_SEGS_ADDR << 16) + (index)); // ldr temp1, [FC_SEGS_ADDR, #index]
cache_addd(0xe0800000 + (reg << 12) + (reg << 16) + (temp1)); // add reg, reg, temp1
cache_addd( LDR_IMM(temp1, FC_SEGS_ADDR, index) ); // ldr temp1, [FC_SEGS_ADDR, #index]
cache_addd( ADD_REG_LSL_IMM(reg, reg, temp1, 0) ); // add reg, reg, temp1
}
#endif
@ -748,21 +840,21 @@ static void gen_add_seg32_to_reg(HostReg reg,Bitu index) {
// mov 16bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 2 must be zero)
// 16bit moves may destroy the upper 16bit of the destination register
static void gen_mov_regval16_to_reg(HostReg dest_reg,Bitu index) {
cache_addd(0xe1d000b0 + (dest_reg << 12) + (FC_REGS_ADDR << 16) + ((index & 0xf0) << 4) + (index & 0x0f)); // ldrh dest_reg, [FC_REGS_ADDR, #index]
cache_addd( LDRH_IMM(dest_reg, FC_REGS_ADDR, index) ); // ldrh dest_reg, [FC_REGS_ADDR, #index]
}
// mov 32bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 4 must be zero)
static void gen_mov_regval32_to_reg(HostReg dest_reg,Bitu index) {
cache_addd(0xe5900000 + (dest_reg << 12) + (FC_REGS_ADDR << 16) + (index)); // ldr dest_reg, [FC_REGS_ADDR, #index]
cache_addd( LDR_IMM(dest_reg, FC_REGS_ADDR, index) ); // ldr dest_reg, [FC_REGS_ADDR, #index]
}
// move a 32bit (dword==true) or 16bit (dword==false) value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero)
// 16bit moves may destroy the upper 16bit of the destination register
static void gen_mov_regword_to_reg(HostReg dest_reg,Bitu index,bool dword) {
if (dword) {
cache_addd(0xe5900000 + (dest_reg << 12) + (FC_REGS_ADDR << 16) + (index)); // ldr dest_reg, [FC_REGS_ADDR, #index]
cache_addd( LDR_IMM(dest_reg, FC_REGS_ADDR, index) ); // ldr dest_reg, [FC_REGS_ADDR, #index]
} else {
cache_addd(0xe1d000b0 + (dest_reg << 12) + (FC_REGS_ADDR << 16) + ((index & 0xf0) << 4) + (index & 0x0f)); // ldrh dest_reg, [FC_REGS_ADDR, #index]
cache_addd( LDRH_IMM(dest_reg, FC_REGS_ADDR, index) ); // ldrh dest_reg, [FC_REGS_ADDR, #index]
}
}
@ -771,7 +863,7 @@ static void gen_mov_regword_to_reg(HostReg dest_reg,Bitu index,bool dword) {
// this function does not use FC_OP1/FC_OP2 as dest_reg as these
// registers might not be directly byte-accessible on some architectures
static void gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index) {
cache_addd(0xe5d00000 + (dest_reg << 12) + (FC_REGS_ADDR << 16) + (index)); // ldrb dest_reg, [FC_REGS_ADDR, #index]
cache_addd( LDRB_IMM(dest_reg, FC_REGS_ADDR, index) ); // ldrb dest_reg, [FC_REGS_ADDR, #index]
}
// move an 8bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR
@ -779,39 +871,39 @@ static void gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index) {
// this function can use FC_OP1/FC_OP2 as dest_reg which are
// not directly byte-accessible on some architectures
static void INLINE gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg,Bitu index) {
cache_addd(0xe5d00000 + (dest_reg << 12) + (FC_REGS_ADDR << 16) + (index)); // ldrb dest_reg, [FC_REGS_ADDR, #index]
cache_addd( LDRB_IMM(dest_reg, FC_REGS_ADDR, index) ); // ldrb dest_reg, [FC_REGS_ADDR, #index]
}
// add a 32bit value from cpu_regs[index] to a full register using FC_REGS_ADDR (index modulo 4 must be zero)
static void gen_add_regval32_to_reg(HostReg reg,Bitu index) {
cache_addd(0xe5900000 + (temp2 << 12) + (FC_REGS_ADDR << 16) + (index)); // ldr temp2, [FC_REGS_ADDR, #index]
cache_addd(0xe0800000 + (reg << 12) + (reg << 16) + (temp2)); // add reg, reg, temp2
cache_addd( LDR_IMM(temp2, FC_REGS_ADDR, index) ); // ldr temp2, [FC_REGS_ADDR, #index]
cache_addd( ADD_REG_LSL_IMM(reg, reg, temp2, 0) ); // add reg, reg, temp2
}
// move 16bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 2 must be zero)
static void gen_mov_regval16_from_reg(HostReg src_reg,Bitu index) {
cache_addd(0xe1c000b0 + (src_reg << 12) + (FC_REGS_ADDR << 16) + ((index & 0xf0) << 4) + (index & 0x0f)); // strh src_reg, [FC_REGS_ADDR, #index]
cache_addd( STRH_IMM(src_reg, FC_REGS_ADDR, index) ); // strh src_reg, [FC_REGS_ADDR, #index]
}
// move 32bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 4 must be zero)
static void gen_mov_regval32_from_reg(HostReg src_reg,Bitu index) {
cache_addd(0xe5800000 + (src_reg << 12) + (FC_REGS_ADDR << 16) + (index)); // str src_reg, [FC_REGS_ADDR, #index]
cache_addd( STR_IMM(src_reg, FC_REGS_ADDR, index) ); // str src_reg, [FC_REGS_ADDR, #index]
}
// move 32bit (dword==true) or 16bit (dword==false) of a register into cpu_regs[index] using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero)
static void gen_mov_regword_from_reg(HostReg src_reg,Bitu index,bool dword) {
if (dword) {
cache_addd(0xe5800000 + (src_reg << 12) + (FC_REGS_ADDR << 16) + (index)); // str src_reg, [FC_REGS_ADDR, #index]
cache_addd( STR_IMM(src_reg, FC_REGS_ADDR, index) ); // str src_reg, [FC_REGS_ADDR, #index]
} else {
cache_addd(0xe1c000b0 + (src_reg << 12) + (FC_REGS_ADDR << 16) + ((index & 0xf0) << 4) + (index & 0x0f)); // strh src_reg, [FC_REGS_ADDR, #index]
cache_addd( STRH_IMM(src_reg, FC_REGS_ADDR, index) ); // strh src_reg, [FC_REGS_ADDR, #index]
}
}
// move the lowest 8bit of a register into cpu_regs[index] using FC_REGS_ADDR
static void gen_mov_regbyte_from_reg_low(HostReg src_reg,Bitu index) {
cache_addd(0xe5c00000 + (src_reg << 12) + (FC_REGS_ADDR << 16) + (index)); // strb src_reg, [FC_REGS_ADDR, #index]
cache_addd( STRB_IMM(src_reg, FC_REGS_ADDR, index) ); // strb src_reg, [FC_REGS_ADDR, #index]
}
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2002-2007 The DOSBox Team
* Copyright (C) 2002-2009 The DOSBox Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -22,6 +22,10 @@
#include <stdio.h>
#include <stdarg.h>
#ifndef min
#define min(a,b) ((a)<(b)?(a):(b))
#endif
/*
Have to remember where i ripped this code sometime ago.