mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-10 08:09:26 +01:00
JIT compiler:
* Improved constants folding in load/store instructions * Merged load instructions This is almost the same commit as r6076/r6077 but x64 build has been fixed. Thanks a lot to skidau and BHaaL!! git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6120 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
430380eac6
commit
cf5088c37e
@ -127,12 +127,20 @@ void XEmitter::ABI_CallFunctionRR(void *func, Gen::X64Reg reg1, Gen::X64Reg reg2
|
||||
void XEmitter::ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2)
|
||||
{
|
||||
ABI_AlignStack(2 * 4);
|
||||
PUSH(32, arg1);
|
||||
PUSH(32, Imm32(param2));
|
||||
PUSH(32, arg1);
|
||||
CALL(func);
|
||||
ABI_RestoreStack(2 * 4);
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionA(void *func, const Gen::OpArg &arg1)
|
||||
{
|
||||
ABI_AlignStack(1 * 4);
|
||||
PUSH(32, arg1);
|
||||
CALL(func);
|
||||
ABI_RestoreStack(1 * 4);
|
||||
}
|
||||
|
||||
void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() {
|
||||
// Note: 4 * 4 = 16 bytes, so alignment is preserved.
|
||||
PUSH(EBP);
|
||||
@ -259,6 +267,13 @@ void XEmitter::ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2
|
||||
CALL(func);
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionA(void *func, const Gen::OpArg &arg1)
|
||||
{
|
||||
if (!arg1.IsSimpleReg(ABI_PARAM1))
|
||||
MOV(32, R(ABI_PARAM1), arg1);
|
||||
CALL(func);
|
||||
}
|
||||
|
||||
unsigned int XEmitter::ABI_GetAlignedFrameSize(unsigned int frameSize) {
|
||||
return frameSize;
|
||||
}
|
||||
|
@ -600,6 +600,7 @@ public:
|
||||
void ABI_CallFunctionCCC(void *func, u32 param1, u32 param2, u32 param3);
|
||||
void ABI_CallFunctionCCP(void *func, u32 param1, u32 param2, void *param3);
|
||||
void ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2);
|
||||
void ABI_CallFunctionA(void *func, const Gen::OpArg &arg1);
|
||||
|
||||
// Pass a register as a paremeter.
|
||||
void ABI_CallFunctionR(void *func, Gen::X64Reg reg1);
|
||||
|
@ -236,8 +236,6 @@ public:
|
||||
void fmaddXX(UGeckoInstruction inst);
|
||||
void fsign(UGeckoInstruction inst);
|
||||
void stX(UGeckoInstruction inst); //stw sth stb
|
||||
void lXz(UGeckoInstruction inst);
|
||||
void lha(UGeckoInstruction inst);
|
||||
void rlwinmx(UGeckoInstruction inst);
|
||||
void rlwimix(UGeckoInstruction inst);
|
||||
void rlwnmx(UGeckoInstruction inst);
|
||||
@ -254,12 +252,8 @@ public:
|
||||
void subfmex(UGeckoInstruction inst);
|
||||
void subfzex(UGeckoInstruction inst);
|
||||
|
||||
void lbzx(UGeckoInstruction inst);
|
||||
void lwzx(UGeckoInstruction inst);
|
||||
void lhax(UGeckoInstruction inst);
|
||||
void lXXx(UGeckoInstruction inst);
|
||||
|
||||
void lwzux(UGeckoInstruction inst);
|
||||
|
||||
void stXx(UGeckoInstruction inst);
|
||||
|
||||
void lmw(UGeckoInstruction inst);
|
||||
|
@ -77,14 +77,14 @@ static GekkoOPTemplate primarytable[] =
|
||||
{28, &Jit64::reg_imm}, //"andi_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}},
|
||||
{29, &Jit64::reg_imm}, //"andis_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}},
|
||||
|
||||
{32, &Jit64::lXz}, //"lwz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
|
||||
{33, &Jit64::Default}, //"lwzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
|
||||
{34, &Jit64::lXz}, //"lbz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
|
||||
{35, &Jit64::Default}, //"lbzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
|
||||
{40, &Jit64::lXz}, //"lhz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
|
||||
{41, &Jit64::Default}, //"lhzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
|
||||
{42, &Jit64::lha}, //"lha", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
|
||||
{43, &Jit64::Default}, //"lhau", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
|
||||
{32, &Jit64::lXXx}, //"lwz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
|
||||
{33, &Jit64::lXXx}, //"lwzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
|
||||
{34, &Jit64::lXXx}, //"lbz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
|
||||
{35, &Jit64::lXXx}, //"lbzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
|
||||
{40, &Jit64::lXXx}, //"lhz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
|
||||
{41, &Jit64::lXXx}, //"lhzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
|
||||
{42, &Jit64::lXXx}, //"lha", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
|
||||
{43, &Jit64::lXXx}, //"lhau", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
|
||||
|
||||
{44, &Jit64::stX}, //"sth", OPTYPE_STORE, FL_IN_A | FL_IN_S}},
|
||||
{45, &Jit64::stX}, //"sthu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}},
|
||||
@ -220,20 +220,20 @@ static GekkoOPTemplate table31[] =
|
||||
{1014, &Jit64::dcbz}, //"dcbz", OPTYPE_DCACHE, 0, 4}},
|
||||
|
||||
//load word
|
||||
{23, &Jit64::lwzx}, //"lwzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
|
||||
{55, &Jit64::lwzux}, //"lwzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
|
||||
{23, &Jit64::lXXx}, //"lwzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
|
||||
{55, &Jit64::lXXx}, //"lwzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
|
||||
|
||||
//load halfword
|
||||
{279, &Jit64::Default}, //"lhzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
|
||||
{311, &Jit64::Default}, //"lhzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
|
||||
{279, &Jit64::lXXx}, //"lhzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
|
||||
{311, &Jit64::lXXx}, //"lhzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
|
||||
|
||||
//load halfword signextend
|
||||
{343, &Jit64::lhax}, //"lhax", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
|
||||
{375, &Jit64::Default}, //"lhaux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
|
||||
{343, &Jit64::lXXx}, //"lhax", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
|
||||
{375, &Jit64::lXXx}, //"lhaux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
|
||||
|
||||
//load byte
|
||||
{87, &Jit64::lbzx}, //"lbzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
|
||||
{119, &Jit64::Default}, //"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
|
||||
{87, &Jit64::lXXx}, //"lbzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
|
||||
{119, &Jit64::lXXx}, //"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
|
||||
|
||||
//load byte reverse
|
||||
{534, &Jit64::Default}, //"lwbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
|
||||
|
@ -34,96 +34,85 @@
|
||||
#include "JitAsm.h"
|
||||
#include "JitRegCache.h"
|
||||
|
||||
void Jit64::lbzx(UGeckoInstruction inst)
|
||||
void Jit64::lXXx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(LoadStore)
|
||||
|
||||
if (Core::g_CoreStartupParameter.bJITLoadStorelbzxOff)
|
||||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||
|
||||
// Skip disabled JIT instructions
|
||||
if (Core::g_CoreStartupParameter.bJITLoadStorelbzxOff && (inst.OPCD == 31) && (inst.SUBOP10 == 87))
|
||||
{ Default(inst); return; }
|
||||
if (Core::g_CoreStartupParameter.bJITLoadStorelXzOff && ((inst.OPCD == 34) || (inst.OPCD == 40) || (inst.OPCD == 32)))
|
||||
{ Default(inst); return; }
|
||||
if (Core::g_CoreStartupParameter.bJITLoadStorelwzOff && (inst.OPCD == 32))
|
||||
{ Default(inst); return; }
|
||||
|
||||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(b));
|
||||
if (a)
|
||||
// Determine memory access size and sign extend
|
||||
int accessSize;
|
||||
bool signExtend;
|
||||
switch (inst.OPCD)
|
||||
{
|
||||
ADD(32, R(ABI_PARAM1), gpr.R(a));
|
||||
case 32: /* lwz */
|
||||
case 33: /* lwzu */
|
||||
accessSize = 32;
|
||||
signExtend = false;
|
||||
break;
|
||||
|
||||
case 34: /* lbz */
|
||||
case 35: /* lbzu */
|
||||
accessSize = 8;
|
||||
signExtend = false;
|
||||
break;
|
||||
|
||||
case 40: /* lhz */
|
||||
case 41: /* lhzu */
|
||||
accessSize = 16;
|
||||
signExtend = false;
|
||||
break;
|
||||
|
||||
case 42: /* lha */
|
||||
case 43: /* lhau */
|
||||
accessSize = 16;
|
||||
signExtend = true;
|
||||
break;
|
||||
|
||||
case 31:
|
||||
switch (inst.SUBOP10)
|
||||
{
|
||||
case 23: /* lwzx */
|
||||
case 55: /* lwzux */
|
||||
accessSize = 32;
|
||||
signExtend = false;
|
||||
break;
|
||||
|
||||
case 87: /* lbzx */
|
||||
case 119: /* lbzux */
|
||||
accessSize = 8;
|
||||
signExtend = false;
|
||||
break;
|
||||
case 279: /* lhzx */
|
||||
case 311: /* lhzux */
|
||||
accessSize = 16;
|
||||
signExtend = false;
|
||||
break;
|
||||
|
||||
case 343: /* lhax */
|
||||
case 375: /* lhaux */
|
||||
accessSize = 16;
|
||||
signExtend = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
PanicAlert("Invalid instruction");
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
PanicAlert("Invalid instruction");
|
||||
}
|
||||
|
||||
SafeLoadRegToEAX(ABI_PARAM1, 8, 0);
|
||||
|
||||
MEMCHECK_START
|
||||
|
||||
gpr.KillImmediate(d, false, true);
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
|
||||
MEMCHECK_END
|
||||
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
void Jit64::lhax(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(LoadStore)
|
||||
|
||||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(b));
|
||||
if (a)
|
||||
{
|
||||
ADD(32, R(ABI_PARAM1), gpr.R(a));
|
||||
}
|
||||
|
||||
// Some homebrew actually loads from a hw reg with this instruction
|
||||
SafeLoadRegToEAX(ABI_PARAM1, 16, 0, true);
|
||||
|
||||
MEMCHECK_START
|
||||
|
||||
gpr.KillImmediate(d, false, true);
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
|
||||
MEMCHECK_END
|
||||
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
void Jit64::lwzx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(LoadStore)
|
||||
|
||||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(b));
|
||||
if (a)
|
||||
{
|
||||
ADD(32, R(ABI_PARAM1), gpr.R(a));
|
||||
}
|
||||
|
||||
SafeLoadRegToEAX(ABI_PARAM1, 32, 0);
|
||||
|
||||
MEMCHECK_START
|
||||
|
||||
gpr.KillImmediate(d, false, true);
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
|
||||
MEMCHECK_END
|
||||
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
void Jit64::lXz(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(LoadStore)
|
||||
|
||||
if (Core::g_CoreStartupParameter.bJITLoadStorelXzOff)
|
||||
{ Default(inst); return; }
|
||||
|
||||
int d = inst.RD;
|
||||
int a = inst.RA;
|
||||
|
||||
// TODO(ector): Make it dynamically enable/disable idle skipping where appropriate
|
||||
// Will give nice boost to dual core mode
|
||||
// (mb2): I agree,
|
||||
@ -144,20 +133,17 @@ void Jit64::lXz(UGeckoInstruction inst)
|
||||
|
||||
// do our job at first
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
gpr.Lock(d);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
SafeLoadRegToEAX(ABI_PARAM1, 32, offset);
|
||||
SafeLoadToEAX(gpr.R(a), accessSize, offset, signExtend);
|
||||
gpr.KillImmediate(d, false, true);
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
|
||||
|
||||
gpr.Flush(FLUSH_ALL);
|
||||
|
||||
// if it's still 0, we can wait until the next event
|
||||
CMP(32, R(RAX), Imm32(0));
|
||||
FixupBranch noIdle = J_CC(CC_NE);
|
||||
TEST(32, R(EAX), R(EAX));
|
||||
FixupBranch noIdle = J_CC(CC_NZ);
|
||||
|
||||
gpr.Flush(FLUSH_ALL);
|
||||
fpr.Flush(FLUSH_ALL);
|
||||
@ -172,110 +158,81 @@ void Jit64::lXz(UGeckoInstruction inst)
|
||||
//js.compilerPC += 8;
|
||||
return;
|
||||
}
|
||||
|
||||
// R2 always points to the small read-only data area. We could bake R2-relative loads into immediates.
|
||||
// R13 always points to the small read/write data area. Not so exciting but at least could drop checks in 32-bit safe mode.
|
||||
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
if (!a)
|
||||
|
||||
// Determine whether this instruction updates inst.RA
|
||||
bool update;
|
||||
if (inst.OPCD == 31)
|
||||
update = ((inst.SUBOP10 & 0x20) != 0);
|
||||
else
|
||||
update = ((inst.OPCD & 1) != 0);
|
||||
|
||||
// Prepare address operand
|
||||
Gen::OpArg opAddress;
|
||||
if (!update && !a)
|
||||
{
|
||||
Default(inst);
|
||||
return;
|
||||
if (inst.OPCD == 31)
|
||||
{
|
||||
gpr.Lock(b);
|
||||
opAddress = gpr.R(b);
|
||||
}
|
||||
else
|
||||
{
|
||||
opAddress = Imm32((u32)(s32)inst.SIMM_16);
|
||||
}
|
||||
}
|
||||
|
||||
int accessSize;
|
||||
switch (inst.OPCD)
|
||||
else if (update && ((a == 0) || (d == a)))
|
||||
{
|
||||
case 32:
|
||||
accessSize = 32;
|
||||
if (Core::g_CoreStartupParameter.bJITLoadStorelwzOff) {Default(inst); return;}
|
||||
break; //lwz
|
||||
case 40: accessSize = 16; break; //lhz
|
||||
case 34: accessSize = 8; break; //lbz
|
||||
default:
|
||||
//_assert_msg_(DYNA_REC, 0, "lXz: invalid access size");
|
||||
PanicAlert("lXz: invalid access size");
|
||||
return;
|
||||
}
|
||||
|
||||
if (accessSize == 32 && jo.enableFastMem && !Core::g_CoreStartupParameter.bMMU)
|
||||
{
|
||||
// Fast and daring
|
||||
gpr.Lock(a, d);
|
||||
gpr.BindToRegister(a, true, false);
|
||||
gpr.BindToRegister(d, a == d, true);
|
||||
MOV(accessSize, gpr.R(d), MComplex(RBX, gpr.R(a).GetSimpleReg(), SCALE_1, offset));
|
||||
BSWAP(32, gpr.R(d).GetSimpleReg());
|
||||
gpr.UnlockAll();
|
||||
PanicAlert("Invalid instruction");
|
||||
}
|
||||
else
|
||||
{
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
gpr.Lock(a);
|
||||
gpr.BindToRegister(a, true, false);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
SafeLoadRegToEAX(ABI_PARAM1, accessSize, offset);
|
||||
|
||||
MEMCHECK_START
|
||||
|
||||
gpr.KillImmediate(d, false, true);
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
|
||||
MEMCHECK_END
|
||||
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
if ((inst.OPCD != 31) && gpr.R(a).IsImm())
|
||||
{
|
||||
opAddress = Imm32((u32)gpr.R(a).offset + (s32)inst.SIMM_16);
|
||||
}
|
||||
else if ((inst.OPCD == 31) && gpr.R(a).IsImm() && gpr.R(b).IsImm())
|
||||
{
|
||||
opAddress = Imm32((u32)gpr.R(a).offset + (u32)gpr.R(b).offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
opAddress = R(ABI_PARAM1);
|
||||
MOV(32, opAddress, gpr.R(a));
|
||||
|
||||
if (inst.OPCD == 31)
|
||||
ADD(32, opAddress, gpr.R(b));
|
||||
else
|
||||
ADD(32, opAddress, Imm32((u32)(s32)inst.SIMM_16));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::lha(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(LoadStore)
|
||||
SafeLoadToEAX(opAddress, accessSize, 0, signExtend);
|
||||
|
||||
int d = inst.RD;
|
||||
int a = inst.RA;
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
// Safe and boring
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
SafeLoadRegToEAX(ABI_PARAM1, 16, offset, true);
|
||||
|
||||
MEMCHECK_START
|
||||
|
||||
gpr.KillImmediate(d, false, true);
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
|
||||
MEMCHECK_END
|
||||
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
void Jit64::lwzux(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(LoadStore)
|
||||
|
||||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||
if (!a || a == d || a == b)
|
||||
// We must flush immediate values from the following registers because
|
||||
// they may change at runtime if no MMU exception has been raised
|
||||
gpr.KillImmediate(d, true, true);
|
||||
if (update)
|
||||
{
|
||||
Default(inst);
|
||||
return;
|
||||
gpr.Lock(a);
|
||||
gpr.BindToRegister(a, true, true);
|
||||
}
|
||||
gpr.Lock(a);
|
||||
gpr.BindToRegister(a, true, true);
|
||||
ADD(32, gpr.R(a), gpr.R(b));
|
||||
MOV(32, R(EAX), gpr.R(a));
|
||||
SafeLoadRegToEAX(EAX, 32, 0, false);
|
||||
|
||||
|
||||
MEMCHECK_START
|
||||
|
||||
gpr.KillImmediate(d, false, true);
|
||||
if (update)
|
||||
{
|
||||
if (inst.OPCD == 31)
|
||||
ADD(32, gpr.R(a), gpr.R(b));
|
||||
else
|
||||
ADD(32, gpr.R(a), Imm32((u32)(s32)inst.SIMM_16));
|
||||
}
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
|
||||
MEMCHECK_END
|
||||
|
||||
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
// Zero cache line.
|
||||
@ -312,7 +269,7 @@ void Jit64::stX(UGeckoInstruction inst)
|
||||
bool update = inst.OPCD & 1;
|
||||
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
if (a || update)
|
||||
if (a || !update)
|
||||
{
|
||||
int accessSize;
|
||||
switch (inst.OPCD & ~1)
|
||||
@ -323,18 +280,18 @@ void Jit64::stX(UGeckoInstruction inst)
|
||||
default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return;
|
||||
}
|
||||
|
||||
if (gpr.R(a).IsImm())
|
||||
if ((a == 0) || gpr.R(a).IsImm())
|
||||
{
|
||||
// If we already know the address through constant folding, we can do some
|
||||
// fun tricks...
|
||||
u32 addr = (u32)gpr.R(a).offset;
|
||||
u32 addr = ((a == 0) ? 0 : (u32)gpr.R(a).offset);
|
||||
addr += offset;
|
||||
if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe)
|
||||
{
|
||||
if (offset && update)
|
||||
gpr.SetImmediate32(a, addr);
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(s));
|
||||
if (update)
|
||||
gpr.SetImmediate32(a, addr);
|
||||
switch (accessSize)
|
||||
{
|
||||
// No need to protect these, they don't touch any state
|
||||
@ -347,16 +304,27 @@ void Jit64::stX(UGeckoInstruction inst)
|
||||
gpr.UnlockAllX();
|
||||
return;
|
||||
}
|
||||
else if (Memory::IsRAMAddress(addr) && accessSize == 32)
|
||||
else if (Memory::IsRAMAddress(addr))
|
||||
{
|
||||
if (offset && update)
|
||||
gpr.SetImmediate32(a, addr);
|
||||
MOV(accessSize, R(EAX), gpr.R(s));
|
||||
MOV(32, R(EAX), gpr.R(s));
|
||||
BSWAP(accessSize, EAX);
|
||||
WriteToConstRamAddress(accessSize, R(EAX), addr);
|
||||
if (update)
|
||||
gpr.SetImmediate32(a, addr);
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (accessSize)
|
||||
{
|
||||
case 32: ABI_CallFunctionAC(thunks.ProtectFunction(true ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), 2), gpr.R(s), addr); break;
|
||||
case 16: ABI_CallFunctionAC(thunks.ProtectFunction(true ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), 2), gpr.R(s), addr); break;
|
||||
case 8: ABI_CallFunctionAC(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), gpr.R(s), addr); break;
|
||||
}
|
||||
if (update)
|
||||
gpr.SetImmediate32(a, addr);
|
||||
return;
|
||||
}
|
||||
// Other IO not worth the trouble.
|
||||
}
|
||||
|
||||
// Optimized stack access?
|
||||
@ -368,11 +336,11 @@ void Jit64::stX(UGeckoInstruction inst)
|
||||
BSWAP(32, EAX);
|
||||
#ifdef _M_X64
|
||||
MOV(accessSize, MComplex(RBX, ABI_PARAM1, SCALE_1, (u32)offset), R(EAX));
|
||||
#elif _M_IX86
|
||||
#else
|
||||
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(accessSize, MDisp(ABI_PARAM1, (u32)Memory::base + (u32)offset), R(EAX));
|
||||
#endif
|
||||
if (update)
|
||||
if (update && offset)
|
||||
{
|
||||
gpr.Lock(a);
|
||||
gpr.KillImmediate(a, true, true);
|
||||
@ -406,9 +374,9 @@ void Jit64::stX(UGeckoInstruction inst)
|
||||
|
||||
if (update && offset)
|
||||
{
|
||||
gpr.KillImmediate(a, true, true);
|
||||
MEMCHECK_START
|
||||
|
||||
gpr.KillImmediate(a, true, true);
|
||||
ADD(32, gpr.R(a), Imm32((u32)offset));
|
||||
|
||||
MEMCHECK_END
|
||||
@ -419,7 +387,7 @@ void Jit64::stX(UGeckoInstruction inst)
|
||||
}
|
||||
else
|
||||
{
|
||||
Default(inst);
|
||||
PanicAlert("Invalid stX");
|
||||
}
|
||||
}
|
||||
|
||||
@ -470,9 +438,7 @@ void Jit64::stXx(UGeckoInstruction inst)
|
||||
// A few games use these heavily in video codecs.
|
||||
void Jit64::lmw(UGeckoInstruction inst)
|
||||
{
|
||||
#ifdef _M_IX86
|
||||
Default(inst); return;
|
||||
#else
|
||||
#ifdef _M_X64
|
||||
gpr.FlushLockX(ECX);
|
||||
MOV(32, R(EAX), Imm32((u32)(s32)inst.SIMM_16));
|
||||
if (inst.RA)
|
||||
@ -485,14 +451,14 @@ void Jit64::lmw(UGeckoInstruction inst)
|
||||
MOV(32, gpr.R(i), R(ECX));
|
||||
}
|
||||
gpr.UnlockAllX();
|
||||
#else
|
||||
Default(inst); return;
|
||||
#endif
|
||||
}
|
||||
|
||||
void Jit64::stmw(UGeckoInstruction inst)
|
||||
{
|
||||
#ifdef _M_IX86
|
||||
Default(inst); return;
|
||||
#else
|
||||
#ifdef _M_X64
|
||||
gpr.FlushLockX(ECX);
|
||||
MOV(32, R(EAX), Imm32((u32)(s32)inst.SIMM_16));
|
||||
if (inst.RA)
|
||||
@ -504,6 +470,8 @@ void Jit64::stmw(UGeckoInstruction inst)
|
||||
MOV(32, MComplex(EBX, EAX, SCALE_1, (i - inst.RD) * 4), R(ECX));
|
||||
}
|
||||
gpr.UnlockAllX();
|
||||
#else
|
||||
Default(inst); return;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -62,15 +62,13 @@ void Jit64::lfs(UGeckoInstruction inst)
|
||||
return;
|
||||
}
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
if (jo.assumeFPLoadFromMem)
|
||||
{
|
||||
UnsafeLoadRegToReg(ABI_PARAM1, EAX, 32, offset, false);
|
||||
UnsafeLoadToEAX(gpr.R(a), 32, offset, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
SafeLoadRegToEAX(ABI_PARAM1, 32, offset);
|
||||
SafeLoadToEAX(gpr.R(a), 32, offset, false);
|
||||
}
|
||||
|
||||
MEMCHECK_START
|
||||
@ -83,7 +81,6 @@ void Jit64::lfs(UGeckoInstruction inst)
|
||||
|
||||
MEMCHECK_END
|
||||
|
||||
gpr.UnlockAllX();
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
@ -299,9 +296,12 @@ void Jit64::stfs(UGeckoInstruction inst)
|
||||
ADD(32, R(ABI_PARAM2), Imm32(offset));
|
||||
if (update && offset)
|
||||
{
|
||||
// We must flush immediate values from the following register because
|
||||
// it may take another value at runtime if no MMU exception has been raised
|
||||
gpr.KillImmediate(a, true, true);
|
||||
|
||||
MEMCHECK_START
|
||||
|
||||
gpr.KillImmediate(a, false, true);
|
||||
MOV(32, gpr.R(a), R(ABI_PARAM2));
|
||||
|
||||
MEMCHECK_END
|
||||
@ -362,7 +362,7 @@ void Jit64::lfsx(UGeckoInstruction inst)
|
||||
|
||||
MEMCHECK_END
|
||||
} else {
|
||||
SafeLoadRegToEAX(EAX, 32, false);
|
||||
SafeLoadToEAX(R(EAX), 32, 0, false);
|
||||
|
||||
MEMCHECK_START
|
||||
|
||||
|
@ -36,11 +36,11 @@ static u32 GC_ALIGNED16(float_buffer);
|
||||
|
||||
void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend)
|
||||
{
|
||||
#ifdef _M_IX86
|
||||
#ifdef _M_X64
|
||||
MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset));
|
||||
#else
|
||||
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVZX(32, accessSize, reg_value, MDisp(reg_addr, (u32)Memory::base + offset));
|
||||
#else
|
||||
MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset));
|
||||
#endif
|
||||
if (accessSize == 32)
|
||||
{
|
||||
@ -63,52 +63,149 @@ void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int acc
|
||||
|
||||
void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset)
|
||||
{
|
||||
#ifdef _M_IX86
|
||||
#ifdef _M_X64
|
||||
MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset));
|
||||
#else
|
||||
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVZX(32, accessSize, reg_value, MDisp(reg_addr, (u32)Memory::base + offset));
|
||||
#else
|
||||
MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset));
|
||||
#endif
|
||||
}
|
||||
|
||||
void EmuCodeBlock::SafeLoadRegToEAX(X64Reg reg_addr, int accessSize, s32 offset, bool signExtend)
|
||||
void EmuCodeBlock::UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend)
|
||||
{
|
||||
if (Core::g_CoreStartupParameter.bUseFastMem && (accessSize == 32 || accessSize == 8) && !Core::g_CoreStartupParameter.bMMU)
|
||||
#ifdef _M_X64
|
||||
if (opAddress.IsSimpleReg())
|
||||
{
|
||||
// FIXME: accessSize == 16 does not work. Breaks mkdd
|
||||
UnsafeLoadRegToReg(reg_addr, EAX, accessSize, offset, signExtend);
|
||||
MOVZX(32, accessSize, EAX, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset));
|
||||
}
|
||||
else if (opAddress.IsImm() && (((u32)opAddress.offset + offset) < 0x80000000)) // MDisp can only be used with s32 offsets
|
||||
{
|
||||
MOVZX(32, accessSize, EAX, MDisp(RBX, (u32)opAddress.offset + offset));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (offset)
|
||||
ADD(32, R(reg_addr), Imm32((u32)offset));
|
||||
MOV(32, R(EAX), opAddress);
|
||||
MOVZX(32, accessSize, EAX, MComplex(RBX, EAX, SCALE_1, offset));
|
||||
}
|
||||
#else
|
||||
if (opAddress.IsImm())
|
||||
{
|
||||
MOVZX(32, accessSize, EAX, M(Memory::base + (((u32)opAddress.offset + offset) & Memory::MEMVIEW32_MASK)));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!opAddress.IsSimpleReg(EAX))
|
||||
MOV(32, R(EAX), opAddress);
|
||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVZX(32, accessSize, EAX, MDisp(EAX, (u32)Memory::base + offset));
|
||||
}
|
||||
#endif
|
||||
|
||||
if (accessSize == 32)
|
||||
{
|
||||
BSWAP(32, EAX);
|
||||
}
|
||||
else if (accessSize == 16)
|
||||
{
|
||||
BSWAP(32, EAX);
|
||||
if (signExtend)
|
||||
SAR(32, R(EAX), Imm8(16));
|
||||
else
|
||||
SHR(32, R(EAX), Imm8(16));
|
||||
}
|
||||
else if (signExtend)
|
||||
{
|
||||
// TODO: bake 8-bit into the original load.
|
||||
MOVSX(32, accessSize, EAX, R(EAX));
|
||||
}
|
||||
}
|
||||
|
||||
void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend)
|
||||
{
|
||||
if (Core::g_CoreStartupParameter.bUseFastMem && (accessSize == 32) && !Core::g_CoreStartupParameter.bMMU)
|
||||
{
|
||||
// BackPatch only supports 32-bits accesses
|
||||
UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend);
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
|
||||
|
||||
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
|
||||
{
|
||||
mem_mask |= Memory::ADDR_MASK_MEM1;
|
||||
}
|
||||
|
||||
TEST(32, R(reg_addr), Imm32(mem_mask));
|
||||
FixupBranch fast = J_CC(CC_Z);
|
||||
|
||||
switch (accessSize)
|
||||
|
||||
if (opAddress.IsImm())
|
||||
{
|
||||
case 32: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), reg_addr); break;
|
||||
case 16: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), reg_addr); break;
|
||||
case 8: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), reg_addr); break;
|
||||
u32 address = (u32)opAddress.offset + offset;
|
||||
if ((address & mem_mask) == 0)
|
||||
{
|
||||
UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend);
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (accessSize)
|
||||
{
|
||||
case 32: ABI_CallFunctionC(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), address); break;
|
||||
case 16: ABI_CallFunctionC(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), address); break;
|
||||
case 8: ABI_CallFunctionC(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), address); break;
|
||||
}
|
||||
if (signExtend && accessSize < 32)
|
||||
{
|
||||
// Need to sign extend values coming from the Read_U* functions.
|
||||
MOVSX(32, accessSize, EAX, R(EAX));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (signExtend && accessSize < 32)
|
||||
else
|
||||
{
|
||||
// Need to sign extend values coming from the Read_U* functions.
|
||||
MOVSX(32, accessSize, EAX, R(EAX));
|
||||
}
|
||||
if (offset)
|
||||
{
|
||||
MOV(32, R(EAX), opAddress);
|
||||
ADD(32, R(EAX), Imm32(offset));
|
||||
TEST(32, R(EAX), Imm32(mem_mask));
|
||||
FixupBranch fast = J_CC(CC_Z);
|
||||
|
||||
FixupBranch exit = J();
|
||||
SetJumpTarget(fast);
|
||||
UnsafeLoadRegToReg(reg_addr, EAX, accessSize, 0, signExtend);
|
||||
SetJumpTarget(exit);
|
||||
switch (accessSize)
|
||||
{
|
||||
case 32: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), EAX); break;
|
||||
case 16: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), EAX); break;
|
||||
case 8: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), EAX); break;
|
||||
}
|
||||
if (signExtend && accessSize < 32)
|
||||
{
|
||||
// Need to sign extend values coming from the Read_U* functions.
|
||||
MOVSX(32, accessSize, EAX, R(EAX));
|
||||
}
|
||||
|
||||
FixupBranch exit = J();
|
||||
SetJumpTarget(fast);
|
||||
UnsafeLoadToEAX(R(EAX), accessSize, 0, signExtend);
|
||||
SetJumpTarget(exit);
|
||||
}
|
||||
else
|
||||
{
|
||||
TEST(32, opAddress, Imm32(mem_mask));
|
||||
FixupBranch fast = J_CC(CC_Z);
|
||||
|
||||
switch (accessSize)
|
||||
{
|
||||
case 32: ABI_CallFunctionA(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), opAddress); break;
|
||||
case 16: ABI_CallFunctionA(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), opAddress); break;
|
||||
case 8: ABI_CallFunctionA(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), opAddress); break;
|
||||
}
|
||||
if (signExtend && accessSize < 32)
|
||||
{
|
||||
// Need to sign extend values coming from the Read_U* functions.
|
||||
MOVSX(32, accessSize, EAX, R(EAX));
|
||||
}
|
||||
|
||||
FixupBranch exit = J();
|
||||
SetJumpTarget(fast);
|
||||
UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend);
|
||||
SetJumpTarget(exit);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -118,11 +215,11 @@ void EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int ac
|
||||
PanicAlert("WARNING: likely incorrect use of UnsafeWriteRegToReg!");
|
||||
}
|
||||
if (swap) BSWAP(accessSize, reg_value);
|
||||
#ifdef _M_IX86
|
||||
#ifdef _M_X64
|
||||
MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, offset), R(reg_value));
|
||||
#else
|
||||
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(accessSize, MDisp(reg_addr, (u32)Memory::base + offset), R(reg_value));
|
||||
#else
|
||||
MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, offset), R(reg_value));
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -174,11 +271,11 @@ void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr)
|
||||
FixupBranch arg2 = J();
|
||||
SetJumpTarget(argh);
|
||||
PSHUFB(xmm_value, M((void *)pbswapShuffle1x4));
|
||||
#ifdef _M_IX86
|
||||
#ifdef _M_X64
|
||||
MOVD_xmm(MComplex(RBX, reg_addr, SCALE_1, 0), xmm_value);
|
||||
#else
|
||||
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVD_xmm(MDisp(reg_addr, (u32)Memory::base), xmm_value);
|
||||
#else
|
||||
MOVD_xmm(MComplex(RBX, reg_addr, SCALE_1, 0), xmm_value);
|
||||
#endif
|
||||
SetJumpTarget(arg2);
|
||||
} else {
|
||||
|
@ -27,7 +27,8 @@ public:
|
||||
void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false);
|
||||
void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset);
|
||||
void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true);
|
||||
void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset, bool signExtend = false);
|
||||
void UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend);
|
||||
void SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend);
|
||||
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, bool swap = true);
|
||||
|
||||
// Trashes both inputs and EAX.
|
||||
|
Loading…
x
Reference in New Issue
Block a user