MMU Speed Optimisations:

* Added memory exception checking to JIT instructions
* Cleaned up Load/Store code (thanks dok.slade)
* Consolidated memory access routines
* Fixed graphics corruption in some games (dcbz instruction)
* F-Zero GX in 32bit JIT mode now works
* Removed temporary slow hack in JITIL (introduced in r4839)


git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6032 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
skidau 2010-08-02 04:22:04 +00:00
parent 26aee8ff76
commit 9c36f0bc88
13 changed files with 289 additions and 182 deletions

View File

@ -184,14 +184,6 @@ bool Init()
g_CoreStartupParameter = _CoreParameter;
// TODO: Reenable JIT instructions
if (g_CoreStartupParameter.bMMU)
{
g_CoreStartupParameter.bJITLoadStoreOff = true;
g_CoreStartupParameter.bJITLoadStorePairedOff = true;
g_CoreStartupParameter.bJITLoadStoreFloatingOff = true;
}
// FIXME DEBUG_LOG(BOOT, dump_params());
Host_SetWaitCursor(true);

View File

@ -147,7 +147,7 @@ u32 EFB_Read(const u32 addr)
}
template <typename T>
inline void ReadFromHardware(T &_var, u32 em_address, u32 effective_address, Memory::XCheckTLBFlag flag)
inline void ReadFromHardware(T &_var, const u32 em_address, const u32 effective_address, Memory::XCheckTLBFlag flag)
{
// TODO: Figure out the fastest order of tests for both read and write (they are probably different).
if ((em_address & 0xC8000000) == 0xC8000000)

View File

@ -394,8 +394,7 @@ void dcbtst(UGeckoInstruction _inst)
void dcbz(UGeckoInstruction _inst)
{
// HACK but works... we think
if (!Core::g_CoreStartupParameter.bMMU)
Memory::Memset(Helper_Get_EA_X(_inst) & (~31), 0, 32); // Breaks Rogue Leader, fixes Super Mario Sunshine
Memory::Memset(Helper_Get_EA_X(_inst) & (~31), 0, 32);
}
// eciwx/ecowx technically should access the specified device

View File

@ -194,7 +194,7 @@ void Jit64::Init()
#else
jo.enableFastMem = false;
#endif
jo.assumeFPLoadFromMem = true;
jo.assumeFPLoadFromMem = Core::g_CoreStartupParameter.bUseFastMem;
jo.fpAccurateFcmp = true; // Fallback to Interpreter
jo.optimizeGatherPipe = true;
jo.fastInterrupts = false;
@ -575,6 +575,10 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
if (js.memcheck && (opinfo->flags & FL_LOADSTORE))
{
// In case we are about to jump to the dispatcher, flush regs
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
TEST(32, M(&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_DSI));
FixupBranch noMemException = J_CC(CC_Z);

View File

@ -38,51 +38,27 @@ void Jit64::lbzx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
if (Core::g_CoreStartupParameter.bJITLoadStorelbzxOff)
{ Default(inst); return; }
int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.Lock(a, b, d);
gpr.FlushLockX(ABI_PARAM1);
if (b == d || a == d)
gpr.LoadToX64(d, true, true);
else
gpr.LoadToX64(d, false, true);
MOV(32, R(ABI_PARAM1), gpr.R(b));
if (a)
{
ADD(32, R(ABI_PARAM1), gpr.R(a));
#if 0
}
SafeLoadRegToEAX(ABI_PARAM1, 8, 0);
MOV(32, gpr.R(d), R(EAX));
#else
UnsafeLoadRegToReg(ABI_PARAM1, gpr.RX(d), 8, 0, false);
#endif
gpr.UnlockAll();
gpr.UnlockAllX();
}
void Jit64::lwzx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
MEMCHECK_START
int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.Lock(a, b, d);
gpr.FlushLockX(ABI_PARAM1);
if (b == d || a == d)
gpr.LoadToX64(d, true, true);
else
gpr.LoadToX64(d, false, true);
MOV(32, R(ABI_PARAM1), gpr.R(b));
if (a)
ADD(32, R(ABI_PARAM1), gpr.R(a));
#if 1
SafeLoadRegToEAX(ABI_PARAM1, 32, 0);
gpr.KillImmediate(d);
MOV(32, gpr.R(d), R(EAX));
#else
UnsafeLoadRegToReg(ABI_PARAM1, gpr.RX(d), 32, 0, false);
#endif
gpr.UnlockAll();
MEMCHECK_END
gpr.UnlockAllX();
}
@ -92,21 +68,48 @@ void Jit64::lhax(UGeckoInstruction inst)
JITDISABLE(LoadStore)
int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.Lock(a, b, d);
gpr.FlushLockX(ABI_PARAM1);
if (b == d || a == d)
gpr.LoadToX64(d, true, true);
else
gpr.LoadToX64(d, false, true);
MOV(32, R(ABI_PARAM1), gpr.R(b));
if (a)
{
ADD(32, R(ABI_PARAM1), gpr.R(a));
}
// Some homebrew actually loads from a hw reg with this instruction
SafeLoadRegToEAX(ABI_PARAM1, 16, 0, true);
MEMCHECK_START
gpr.KillImmediate(d);
MOV(32, gpr.R(d), R(EAX));
gpr.UnlockAll();
MEMCHECK_END
gpr.UnlockAllX();
}
void Jit64::lwzx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.FlushLockX(ABI_PARAM1);
MOV(32, R(ABI_PARAM1), gpr.R(b));
if (a)
{
ADD(32, R(ABI_PARAM1), gpr.R(a));
}
SafeLoadRegToEAX(ABI_PARAM1, 32, 0);
MEMCHECK_START
gpr.KillImmediate(d);
MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END
gpr.UnlockAllX();
}
@ -114,6 +117,7 @@ void Jit64::lXz(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
if (Core::g_CoreStartupParameter.bJITLoadStorelXzOff)
{ Default(inst); return; }
@ -135,7 +139,7 @@ void Jit64::lXz(UGeckoInstruction inst)
{
// TODO(LinesPrower):
// - Rewrite this!
// It seems to be ugly and unefficient, but I don't know JIT stuff enough to make it right
// It seems to be ugly and inefficient, but I don't know JIT stuff enough to make it right
// It only demonstrates the idea
// do our job at first
@ -178,6 +182,7 @@ void Jit64::lXz(UGeckoInstruction inst)
Default(inst);
return;
}
int accessSize;
switch (inst.OPCD)
{
@ -193,40 +198,32 @@ void Jit64::lXz(UGeckoInstruction inst)
return;
}
//Still here? Do regular path.
#if defined(_M_X64)
if (accessSize == 8 || accessSize == 16 || !jo.enableFastMem) {
#else
if (true) {
#endif
// Safe and boring
if (accessSize == 32 && jo.enableFastMem && !Core::g_CoreStartupParameter.bMMU)
{
// Fast and daring
gpr.Lock(a, d);
gpr.LoadToX64(a, true, false);
gpr.LoadToX64(d, a == d, true);
MOV(accessSize, gpr.R(d), MComplex(RBX, gpr.R(a).GetSimpleReg(), SCALE_1, offset));
BSWAP(32, gpr.R(d).GetSimpleReg());
gpr.UnlockAll();
gpr.Flush(FLUSH_ALL);
}
else
{
gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(d, a);
MOV(32, R(ABI_PARAM1), gpr.R(a));
SafeLoadRegToEAX(ABI_PARAM1, accessSize, offset);
gpr.LoadToX64(d, false, true);
MOV(32, gpr.R(d), R(EAX));
gpr.UnlockAll();
gpr.UnlockAllX();
return;
}
MEMCHECK_START
// Fast and daring
gpr.Lock(a, d);
gpr.LoadToX64(a, true, false);
gpr.LoadToX64(d, a == d, true);
MOV(accessSize, gpr.R(d), MComplex(RBX, gpr.R(a).GetSimpleReg(), SCALE_1, offset));
switch (accessSize) {
case 32:
BSWAP(32, gpr.R(d).GetSimpleReg());
break;
// Careful in the backpatch - need to properly nop over first
// case 16:
// BSWAP(32, gpr.R(d).GetSimpleReg());
// SHR(32, gpr.R(d), Imm8(16));
// break;
gpr.KillImmediate(d);
MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END
gpr.UnlockAllX();
}
gpr.UnlockAll();
}
void Jit64::lha(UGeckoInstruction inst)
@ -239,14 +236,17 @@ void Jit64::lha(UGeckoInstruction inst)
s32 offset = (s32)(s16)inst.SIMM_16;
// Safe and boring
gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(d, a);
MOV(32, R(ABI_PARAM1), gpr.R(a));
SafeLoadRegToEAX(ABI_PARAM1, 16, offset, true);
gpr.LoadToX64(d, d == a, true);
MEMCHECK_START
gpr.KillImmediate(d);
MOV(32, gpr.R(d), R(EAX));
gpr.UnlockAll();
MEMCHECK_END
gpr.UnlockAllX();
return;
}
void Jit64::lwzux(UGeckoInstruction inst)
@ -260,17 +260,20 @@ void Jit64::lwzux(UGeckoInstruction inst)
Default(inst);
return;
}
gpr.Lock(a, b, d);
gpr.LoadToX64(d, b == d, true);
gpr.Lock(a);
gpr.LoadToX64(a, true, true);
ADD(32, gpr.R(a), gpr.R(b));
MOV(32, R(EAX), gpr.R(a));
SafeLoadRegToEAX(EAX, 32, 0, false);
MEMCHECK_START
gpr.KillImmediate(d);
MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END
gpr.UnlockAll();
return;
}
// Zero cache line.
@ -348,7 +351,7 @@ void Jit64::stX(UGeckoInstruction inst)
gpr.SetImmediate32(a, addr);
MOV(accessSize, R(EAX), gpr.R(s));
BSWAP(accessSize, EAX);
WriteToConstRamAddress(accessSize, R(EAX), addr);
WriteToConstRamAddress(accessSize, R(EAX), addr);
return;
}
// Other IO not worth the trouble.
@ -387,35 +390,23 @@ void Jit64::stX(UGeckoInstruction inst)
#endif*/
//Still here? Do regular path.
gpr.Lock(s, a);
gpr.FlushLockX(ECX, EDX);
MOV(32, R(EDX), gpr.R(a));
MOV(32, R(ECX), gpr.R(s));
if (offset)
ADD(32, R(EDX), Imm32((u32)offset));
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
gpr.Lock(a);
MOV(32, R(ABI_PARAM2), gpr.R(a));
MOV(32, R(ABI_PARAM1), gpr.R(s));
SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, accessSize, offset);
if (update && offset)
{
gpr.LoadToX64(a, true, true);
MOV(32, gpr.R(a), R(EDX));
MEMCHECK_START
gpr.KillImmediate(a);
MOV(32, gpr.R(a), R(ABI_PARAM2));
MEMCHECK_END
}
TEST(32, R(EDX), Imm32(0x0C000000));
FixupBranch unsafe_addr = J_CC(CC_NZ);
BSWAP(accessSize, ECX);
#ifdef _M_X64
MOV(accessSize, MComplex(RBX, EDX, SCALE_1, 0), R(ECX));
#else
AND(32, R(EDX), Imm32(Memory::MEMVIEW32_MASK));
MOV(accessSize, MDisp(EDX, (u32)Memory::base), R(ECX));
#endif
FixupBranch skip_call = J();
SetJumpTarget(unsafe_addr);
switch (accessSize)
{
case 32: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ECX, EDX); break;
case 16: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U16, 2), ECX, EDX); break;
case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), ECX, EDX); break;
}
SetJumpTarget(skip_call);
gpr.UnlockAll();
gpr.UnlockAllX();
}
@ -459,9 +450,14 @@ void Jit64::stXx(UGeckoInstruction inst)
MOV(32, R(ECX), gpr.R(s));
SafeWriteRegToReg(ECX, EDX, accessSize, 0);
//MEMCHECK_START
// TODO: Insert rA update code here
//MEMCHECK_END
gpr.UnlockAll();
gpr.UnlockAllX();
return;
}
// A few games use these heavily in video codecs.

View File

@ -63,7 +63,6 @@ void Jit64::lfs(UGeckoInstruction inst)
}
s32 offset = (s32)(s16)inst.SIMM_16;
gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(a);
MOV(32, R(ABI_PARAM1), gpr.R(a));
if (jo.assumeFPLoadFromMem)
{
@ -74,12 +73,16 @@ void Jit64::lfs(UGeckoInstruction inst)
SafeLoadRegToEAX(ABI_PARAM1, 32, offset);
}
MEMCHECK_START
MOV(32, M(&temp32), R(EAX));
fpr.Lock(d);
fpr.LoadToX64(d, false);
CVTSS2SD(fpr.RX(d), M(&temp32));
MOVDDUP(fpr.RX(d), fpr.R(d));
gpr.UnlockAll();
MEMCHECK_END
gpr.UnlockAllX();
fpr.UnlockAll();
}
@ -90,6 +93,8 @@ void Jit64::lfd(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(LoadStoreFloating)
if (js.memcheck) { Default(inst); return; }
int d = inst.RD;
int a = inst.RA;
if (!a)
@ -119,18 +124,28 @@ void Jit64::lfd(UGeckoInstruction inst)
MOV(64, R(EAX), MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
BSWAP(64, EAX);
MOV(64, M(&temp64), R(EAX));
MEMCHECK_START
MOVSD(XMM0, M(&temp64));
MOVSD(xd, R(XMM0));
MEMCHECK_END
#else
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::base + offset));
BSWAP(32, EAX);
MOV(32, M((void*)((u8 *)&temp64+4)), R(EAX));
MEMCHECK_START
MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::base + offset + 4));
BSWAP(32, EAX);
MOV(32, M(&temp64), R(EAX));
MOVSD(XMM0, M(&temp64));
MOVSD(xd, R(XMM0));
MEMCHECK_END
#if 0
// Alternate implementation; possibly faster
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
@ -156,6 +171,8 @@ void Jit64::stfd(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(LoadStoreFloating)
if (js.memcheck) { Default(inst); return; }
int s = inst.RS;
int a = inst.RA;
if (!a)
@ -207,18 +224,28 @@ void Jit64::stfd(UGeckoInstruction inst)
#ifdef _M_X64
fpr.LoadToX64(s, true, false);
MOVSD(M(&temp64), fpr.RX(s));
MEMCHECK_START
MOV(64, R(EAX), M(&temp64));
BSWAP(64, EAX);
MOV(64, MComplex(RBX, ABI_PARAM1, SCALE_1, 0), R(EAX));
MEMCHECK_END
#else
fpr.LoadToX64(s, true, false);
MOVSD(M(&temp64), fpr.RX(s));
MEMCHECK_START
MOV(32, R(EAX), M(&temp64));
BSWAP(32, EAX);
MOV(32, MDisp(ABI_PARAM1, (u32)Memory::base + 4), R(EAX));
MOV(32, R(EAX), M((void*)((u8 *)&temp64 + 4)));
BSWAP(32, EAX);
MOV(32, MDisp(ABI_PARAM1, (u32)Memory::base), R(EAX));
MEMCHECK_END
#endif
}
SetJumpTarget(quit);
@ -233,6 +260,8 @@ void Jit64::stfs(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(LoadStoreFloating)
if (js.memcheck) { Default(inst); return; }
bool update = inst.OPCD & 1;
int s = inst.RS;
int a = inst.RA;
@ -287,6 +316,8 @@ void Jit64::stfsx(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(LoadStoreFloating)
if (js.memcheck) { Default(inst); return; }
// We can take a shortcut here - it's not likely that a hardware access would use this instruction.
gpr.FlushLockX(ABI_PARAM1);
fpr.Lock(inst.RS);
@ -295,7 +326,8 @@ void Jit64::stfsx(UGeckoInstruction inst)
ADD(32, R(ABI_PARAM1), gpr.R(inst.RA));
CVTSD2SS(XMM0, fpr.R(inst.RS));
MOVD_xmm(R(EAX), XMM0);
UnsafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0);
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0);
gpr.UnlockAllX();
fpr.UnlockAll();
}
@ -306,12 +338,14 @@ void Jit64::lfsx(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(LoadStoreFloating)
fpr.Lock(inst.RS);
fpr.LoadToX64(inst.RS, false, true);
MOV(32, R(EAX), gpr.R(inst.RB));
if (inst.RA)
{
ADD(32, R(EAX), gpr.R(inst.RA));
if (cpu_info.bSSSE3) {
}
if (cpu_info.bSSSE3 && !js.memcheck) {
fpr.Lock(inst.RS);
fpr.LoadToX64(inst.RS, false, true);
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
#ifdef _M_IX86
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
@ -319,14 +353,25 @@ void Jit64::lfsx(UGeckoInstruction inst)
#else
MOVD_xmm(r, MComplex(RBX, EAX, SCALE_1, 0));
#endif
MEMCHECK_START
PSHUFB(r, M((void *)bswapShuffle1x4));
CVTSS2SD(r, R(r));
MOVDDUP(r, R(r));
MEMCHECK_END
} else {
UnsafeLoadRegToReg(EAX, EAX, 32, false);
SafeLoadRegToEAX(EAX, 32, false);
MEMCHECK_START
MOV(32, M(&temp32), R(EAX));
CVTSS2SD(XMM0, M(&temp32));
fpr.Lock(inst.RS);
fpr.LoadToX64(inst.RS, false, true);
MOVDDUP(fpr.R(inst.RS).GetSimpleReg(), R(XMM0));
MEMCHECK_END
}
fpr.UnlockAll();
}

View File

@ -54,6 +54,8 @@ void Jit64::psq_st(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(LoadStorePaired)
if (js.memcheck) { Default(inst); return; }
if (!inst.RA)
{
// TODO: Support these cases if it becomes necessary.
@ -136,6 +138,8 @@ void Jit64::psq_l(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(LoadStorePaired)
if (js.memcheck) { Default(inst); return; }
if (!inst.RA)
{
Default(inst);
@ -174,7 +178,13 @@ void Jit64::psq_l(UGeckoInstruction inst)
ABI_AlignStack(0);
CALLptr(MDisp(EDX, (u32)(u64)asm_routines.pairedLoadQuantized));
ABI_RestoreStack(0);
// MEMCHECK_START // FIXME: MMU does not work here because of unsafe memory access
CVTPS2PD(fpr.RX(inst.RS), R(XMM0));
// MEMCHECK_END
gpr.UnlockAll();
gpr.UnlockAllX();
}

View File

@ -465,7 +465,6 @@ static OpArg regBuildMemAddress(RegInfo& RI, InstLoc I, InstLoc AI,
}
static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) {
bool win32 = false;
if (RI.UseProfile) {
unsigned curLoad = ProfiledLoads[RI.numProfiledLoads++];
if (!(curLoad & 0x0C000000)) {
@ -486,15 +485,8 @@ static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) {
if (RI.MakeProfile) {
RI.Jit->MOV(32, M(&ProfiledLoads[RI.numProfiledLoads++]), R(ECX));
}
#ifdef _M_IX86
win32 = true;
#endif
FixupBranch argh;
if (!(win32 && SConfig::GetInstance().m_LocalCoreStartupParameter.iTLBHack == 1))
{
RI.Jit->TEST(32, R(ECX), Imm32(0x0C000000));
argh = RI.Jit->J_CC(CC_Z);
}
RI.Jit->TEST(32, R(ECX), Imm32(0x0C000000));
FixupBranch argh = RI.Jit->J_CC(CC_Z);
// Slow safe read using Memory::Read_Ux routines
#ifdef _M_IX86 // we don't allocate EAX on x64 so no reason to save it.
@ -514,14 +506,10 @@ static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) {
RI.Jit->POP(32, R(EAX));
#endif
}
if (!(win32 && SConfig::GetInstance().m_LocalCoreStartupParameter.iTLBHack == 1))
{
FixupBranch arg2 = RI.Jit->J();
// Fast unsafe read using memory pointer EBX
RI.Jit->SetJumpTarget(argh);
RI.Jit->UnsafeLoadRegToReg(ECX, reg, Size, 0, false);
RI.Jit->SetJumpTarget(arg2);
}
FixupBranch arg2 = RI.Jit->J();
RI.Jit->SetJumpTarget(argh);
RI.Jit->UnsafeLoadRegToReg(ECX, reg, Size, 0, false);
RI.Jit->SetJumpTarget(arg2);
if (regReadUse(RI, I))
RI.regs[reg] = I;
}

View File

@ -184,7 +184,7 @@ void JitIL::Init()
#else
jo.enableFastMem = false;
#endif
jo.assumeFPLoadFromMem = true;
jo.assumeFPLoadFromMem = Core::g_CoreStartupParameter.bUseFastMem;
jo.fpAccurateFcmp = false;
jo.optimizeGatherPipe = true;
jo.fastInterrupts = false;

View File

@ -39,6 +39,7 @@ void JitIL::lhax(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB);
if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
@ -50,7 +51,8 @@ void JitIL::lhax(UGeckoInstruction inst)
void JitIL::lXz(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
JITDISABLE(LoadStore)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16);
if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
@ -81,6 +83,7 @@ void JitIL::lha(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr =
ibuild.EmitIntConst((s32)(s16)inst.SIMM_16);
if (inst.RA)
@ -94,6 +97,7 @@ void JitIL::lXzx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB);
if (inst.RA) {
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
@ -141,6 +145,7 @@ void JitIL::stX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16),
value = ibuild.EmitLoadGReg(inst.RS);
if (inst.RA)
@ -160,6 +165,7 @@ void JitIL::stXx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB),
value = ibuild.EmitLoadGReg(inst.RS);
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
@ -179,6 +185,7 @@ void JitIL::lmw(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16);
if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
@ -194,6 +201,7 @@ void JitIL::stmw(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16);
if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));

View File

@ -43,6 +43,7 @@ void JitIL::lfs(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStoreFloating)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16), val;
if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
@ -56,6 +57,7 @@ void JitIL::lfd(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStoreFloating)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16), val;
if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
@ -70,6 +72,7 @@ void JitIL::stfd(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStoreFloating)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16),
val = ibuild.EmitLoadFReg(inst.RS);
if (inst.RA)
@ -85,6 +88,7 @@ void JitIL::stfs(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStoreFloating)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16),
val = ibuild.EmitLoadFReg(inst.RS);
if (inst.RA)
@ -101,6 +105,7 @@ void JitIL::stfsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStoreFloating)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB),
val = ibuild.EmitLoadFReg(inst.RS);
if (inst.RA)
@ -115,6 +120,7 @@ void JitIL::lfsx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStoreFloating)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB), val;
if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));

View File

@ -37,6 +37,7 @@ void JitIL::psq_st(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStorePaired)
if (js.memcheck) { Default(inst); return; }
if (inst.W) {Default(inst); return;}
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_12), val;
if (inst.RA)
@ -52,6 +53,7 @@ void JitIL::psq_l(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStorePaired)
if (js.memcheck) { Default(inst); return; }
if (inst.W) {Default(inst); return;}
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_12), val;
if (inst.RA)

View File

@ -31,15 +31,9 @@
using namespace Gen;
void EmuCodeBlock::JitClearCA()
{
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0
}
void EmuCodeBlock::JitSetCA()
{
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1
}
static const u8 GC_ALIGNED16(pbswapShuffle1x4[16]) = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
static u32 GC_ALIGNED16(float_buffer);
void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend)
{
@ -60,7 +54,9 @@ void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int acc
SAR(32, R(reg_value), Imm8(16));
else
SHR(32, R(reg_value), Imm8(16));
} else if (signExtend) {
}
else if (signExtend)
{
// TODO: bake 8-bit into the original load.
MOVSX(32, accessSize, reg_value, R(reg_value));
}
@ -76,26 +72,53 @@ void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, i
#endif
}
void EmuCodeBlock::SafeLoadRegToEAX(X64Reg reg, int accessSize, s32 offset, bool signExtend)
void EmuCodeBlock::SafeLoadRegToEAX(X64Reg reg_addr, int accessSize, s32 offset, bool signExtend)
{
if (offset)
ADD(32, R(reg), Imm32((u32)offset));
TEST(32, R(reg), Imm32(0x0C000000));
FixupBranch argh = J_CC(CC_Z);
switch (accessSize)
if (Core::g_CoreStartupParameter.bUseFastMem && accessSize == 32 && !Core::g_CoreStartupParameter.bMMU)
{
case 32: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), reg); break;
case 16: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), reg); break;
case 8: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), reg); break;
UnsafeLoadRegToReg(reg_addr, EAX, accessSize, offset, signExtend);
}
if (signExtend && accessSize < 32) {
// Need to sign extend values coming from the Read_U* functions.
MOVSX(32, accessSize, EAX, R(EAX));
else
{
if (offset)
ADD(32, R(reg_addr), Imm32((u32)offset));
FixupBranch addrf0;
FixupBranch addr20;
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
{
CMP(32, R(reg_addr), Imm32(0xf0000000));
addrf0 = J_CC(CC_GE);
TEST(32, R(reg_addr), Imm32(0x20000000));
addr20 = J_CC(CC_NZ);
}
TEST(32, R(reg_addr), Imm32(0x0C000000));
FixupBranch fast = J_CC(CC_Z);
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
{
SetJumpTarget(addr20);
SetJumpTarget(addrf0);
}
switch (accessSize)
{
case 32: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), reg_addr); break;
case 16: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), reg_addr); break;
case 8: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), reg_addr); break;
}
if (signExtend && accessSize < 32)
{
// Need to sign extend values coming from the Read_U* functions.
MOVSX(32, accessSize, EAX, R(EAX));
}
FixupBranch exit = J();
SetJumpTarget(fast);
UnsafeLoadRegToReg(reg_addr, EAX, accessSize, 0, signExtend);
SetJumpTarget(exit);
}
FixupBranch arg2 = J();
SetJumpTarget(argh);
UnsafeLoadRegToReg(reg, EAX, accessSize, 0, signExtend);
SetJumpTarget(arg2);
}
void EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap)
@ -116,23 +139,47 @@ void EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int ac
void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap)
{
if (offset)
ADD(32, R(reg_addr), Imm32(offset));
TEST(32, R(reg_addr), Imm32(0x0C000000));
FixupBranch argh = J_CC(CC_Z);
ADD(32, R(reg_addr), Imm32((u32)offset));
// TODO: Figure out a cleaner way to check memory bounds
FixupBranch addrf0;
FixupBranch addr20;
FixupBranch fast;
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
{
CMP(32, R(reg_addr), Imm32(0xf0000000));
addrf0 = J_CC(CC_GE);
TEST(32, R(reg_addr), Imm32(0x20000000));
addr20 = J_CC(CC_NZ);
}
if (!Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bUseFastMem)
{
TEST(32, R(reg_addr), Imm32(0x0C000000));
fast = J_CC(CC_Z);
}
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
{
SetJumpTarget(addr20);
SetJumpTarget(addrf0);
}
switch (accessSize)
{
case 32: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), 2), reg_value, reg_addr); break;
case 16: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), 2), reg_value, reg_addr); break;
case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), reg_value, reg_addr); break;
}
FixupBranch arg2 = J();
SetJumpTarget(argh);
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap);
SetJumpTarget(arg2);
}
FixupBranch exit = J();
static const u8 GC_ALIGNED16(pbswapShuffle1x4[16]) = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
static u32 GC_ALIGNED16(float_buffer);
if (!Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bUseFastMem)
{
SetJumpTarget(fast);
}
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap);
SetJumpTarget(exit);
}
void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr)
{
@ -147,12 +194,12 @@ void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr)
FixupBranch arg2 = J();
SetJumpTarget(argh);
PSHUFB(xmm_value, M((void *)pbswapShuffle1x4));
#ifdef _M_IX86
#ifdef _M_IX86
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
MOVD_xmm(MDisp(reg_addr, (u32)Memory::base), xmm_value);
#else
#else
MOVD_xmm(MComplex(RBX, reg_addr, SCALE_1, 0), xmm_value);
#endif
#endif
SetJumpTarget(arg2);
} else {
MOVSS(M(&float_buffer), xmm_value);
@ -197,3 +244,13 @@ void EmuCodeBlock::ForceSinglePrecisionP(X64Reg xmm) {
CVTPS2PD(xmm, R(xmm));
}
}
void EmuCodeBlock::JitClearCA()
{
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0
}
void EmuCodeBlock::JitSetCA()
{
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1
}