From 6cdb40a19420807268c1c962ef673cb297a3e72e Mon Sep 17 00:00:00 2001 From: pierre Date: Wed, 29 Jun 2011 22:40:01 +0000 Subject: [PATCH] JIT: Add support for memory accesses with scaled index register but without base register This is mostly useful for removing SHLs by constant 1, 2, 3, which this commit implements in one place. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@7652 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Common/Src/x64Emitter.cpp | 21 +++++++++++++------ Source/Core/Common/Src/x64Emitter.h | 10 +++++++++ .../Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp | 16 +++++++------- 3 files changed, 32 insertions(+), 15 deletions(-) diff --git a/Source/Core/Common/Src/x64Emitter.cpp b/Source/Core/Common/Src/x64Emitter.cpp index 8b8085e25b..9acfac5864 100644 --- a/Source/Core/Common/Src/x64Emitter.cpp +++ b/Source/Core/Common/Src/x64Emitter.cpp @@ -224,6 +224,12 @@ void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg, mod = 1; //8-bit displacement } } + else if (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8) + { + SIB = true; + mod = 0; + _offsetOrBaseReg = 5; + } else //if (scale != SCALE_ATREG) { if ((_offsetOrBaseReg & 7) == 4) //this would occupy the SIB encoding :( @@ -275,11 +281,14 @@ void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg, int ss; switch (scale) { - case 0: _offsetOrBaseReg = 4; ss = 0; break; //RSP - case 1: ss = 0; break; - case 2: ss = 1; break; - case 4: ss = 2; break; - case 8: ss = 3; break; + case SCALE_NONE: _offsetOrBaseReg = 4; ss = 0; break; //RSP + case SCALE_1: ss = 0; break; + case SCALE_2: ss = 1; break; + case SCALE_4: ss = 2; break; + case SCALE_8: ss = 3; break; + case SCALE_NOBASE_2: ss = 1; break; + case SCALE_NOBASE_4: ss = 2; break; + case SCALE_NOBASE_8: ss = 3; break; case SCALE_ATREG: ss = 0; break; default: _assert_msg_(DYNA_REC, 0, "Invalid scale for SIB byte"); ss = 0; break; } @@ -290,7 +299,7 @@ void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg, { emit->Write8((u8)(s8)(s32)offset); } - else if (mod == 2) //32-bit disp + else if (mod == 2 || (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8)) //32-bit disp { emit->Write32((u32)offset); } diff --git a/Source/Core/Common/Src/x64Emitter.h b/Source/Core/Common/Src/x64Emitter.h index 522b22b8a1..78c2e9e620 100644 --- a/Source/Core/Common/Src/x64Emitter.h +++ b/Source/Core/Common/Src/x64Emitter.h @@ -83,6 +83,10 @@ enum SCALE_4 = 4, SCALE_8 = 8, SCALE_ATREG = 16, + //SCALE_NOBASE_1 is not supported and can be replaced with SCALE_ATREG + SCALE_NOBASE_2 = 34, + SCALE_NOBASE_4 = 36, + SCALE_NOBASE_8 = 40, SCALE_RIP = 0xFF, SCALE_IMM8 = 0xF0, SCALE_IMM16 = 0xF1, @@ -176,6 +180,12 @@ inline OpArg MDisp(X64Reg value, int offset) { inline OpArg MComplex(X64Reg base, X64Reg scaled, int scale, int offset) { return OpArg(offset, scale, base, scaled); } +inline OpArg MScaled(X64Reg scaled, int scale, int offset) { + if (scale == SCALE_1) + return OpArg(offset, SCALE_ATREG, scaled); + else + return OpArg(offset, scale | 0x20, INVALID_REG, scaled); +} inline OpArg MRegSum(X64Reg base, X64Reg offset) { return MComplex(base, offset, 1, 0); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp index cfd9a6970b..3b14179cd4 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -110,23 +110,22 @@ void Jit64::psq_st(UGeckoInstruction inst) MOVZX(32, 8, EDX, R(AL)); // FIXME: Fix ModR/M encoding to allow [EDX*4+disp32] without a base register! #ifdef _M_IX86 - int addr_shift = 2; + int addr_scale = SCALE_4; #else - int addr_shift = 3; + int addr_scale = SCALE_8; #endif - SHL(32, R(EDX), Imm8(addr_shift)); if (inst.W) { // One value XORPS(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions. CVTSD2SS(XMM0, fpr.R(s)); ABI_AlignStack(0); - CALLptr(MDisp(EDX, (u32)(u64)asm_routines.singleStoreQuantized)); + CALLptr(MScaled(EDX, addr_scale, (u32)(u64)asm_routines.singleStoreQuantized)); ABI_RestoreStack(0); } else { // Pair of values CVTPD2PS(XMM0, fpr.R(s)); ABI_AlignStack(0); - CALLptr(MDisp(EDX, (u32)(u64)asm_routines.pairedStoreQuantized)); + CALLptr(MScaled(EDX, addr_scale, (u32)(u64)asm_routines.pairedStoreQuantized)); ABI_RestoreStack(0); } gpr.UnlockAll(); @@ -169,14 +168,13 @@ void Jit64::psq_l(UGeckoInstruction inst) MOV(32, gpr.R(inst.RA), R(ECX)); MOVZX(32, 16, EAX, M(((char *)&GQR(inst.I)) + 2)); MOVZX(32, 8, EDX, R(AL)); - // FIXME: Fix ModR/M encoding to allow [EDX*4+disp32]! (MComplex can do this, no?) #ifdef _M_IX86 - SHL(32, R(EDX), Imm8(2)); + int addr_scale = SCALE_4; #else - SHL(32, R(EDX), Imm8(3)); + int addr_scale = SCALE_8; #endif ABI_AlignStack(0); - CALLptr(MDisp(EDX, (u32)(u64)asm_routines.pairedLoadQuantized)); + CALLptr(MScaled(EDX, addr_scale, (u32)(u64)asm_routines.pairedLoadQuantized)); ABI_RestoreStack(0); // MEMCHECK_START // FIXME: MMU does not work here because of unsafe memory access