MMU Speed Optimisations:

* Added memory exception checking to JIT instructions
* Cleaned up Load/Store code (thanks dok.slade)
* Consolidated memory access routines
* Fixed graphics corruption in some games (dcbz instruction)
* F-Zero GX in 32bit JIT mode now works
* Removed temporary slow hack in JITIL (introduced in r4839)


git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6032 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
skidau 2010-08-02 04:22:04 +00:00
parent 26aee8ff76
commit 9c36f0bc88
13 changed files with 289 additions and 182 deletions

View File

@ -184,14 +184,6 @@ bool Init()
g_CoreStartupParameter = _CoreParameter; g_CoreStartupParameter = _CoreParameter;
// TODO: Reenable JIT instructions
if (g_CoreStartupParameter.bMMU)
{
g_CoreStartupParameter.bJITLoadStoreOff = true;
g_CoreStartupParameter.bJITLoadStorePairedOff = true;
g_CoreStartupParameter.bJITLoadStoreFloatingOff = true;
}
// FIXME DEBUG_LOG(BOOT, dump_params()); // FIXME DEBUG_LOG(BOOT, dump_params());
Host_SetWaitCursor(true); Host_SetWaitCursor(true);

View File

@ -147,7 +147,7 @@ u32 EFB_Read(const u32 addr)
} }
template <typename T> template <typename T>
inline void ReadFromHardware(T &_var, u32 em_address, u32 effective_address, Memory::XCheckTLBFlag flag) inline void ReadFromHardware(T &_var, const u32 em_address, const u32 effective_address, Memory::XCheckTLBFlag flag)
{ {
// TODO: Figure out the fastest order of tests for both read and write (they are probably different). // TODO: Figure out the fastest order of tests for both read and write (they are probably different).
if ((em_address & 0xC8000000) == 0xC8000000) if ((em_address & 0xC8000000) == 0xC8000000)

View File

@ -394,8 +394,7 @@ void dcbtst(UGeckoInstruction _inst)
void dcbz(UGeckoInstruction _inst) void dcbz(UGeckoInstruction _inst)
{ {
// HACK but works... we think // HACK but works... we think
if (!Core::g_CoreStartupParameter.bMMU) Memory::Memset(Helper_Get_EA_X(_inst) & (~31), 0, 32);
Memory::Memset(Helper_Get_EA_X(_inst) & (~31), 0, 32); // Breaks Rogue Leader, fixes Super Mario Sunshine
} }
// eciwx/ecowx technically should access the specified device // eciwx/ecowx technically should access the specified device

View File

@ -194,7 +194,7 @@ void Jit64::Init()
#else #else
jo.enableFastMem = false; jo.enableFastMem = false;
#endif #endif
jo.assumeFPLoadFromMem = true; jo.assumeFPLoadFromMem = Core::g_CoreStartupParameter.bUseFastMem;
jo.fpAccurateFcmp = true; // Fallback to Interpreter jo.fpAccurateFcmp = true; // Fallback to Interpreter
jo.optimizeGatherPipe = true; jo.optimizeGatherPipe = true;
jo.fastInterrupts = false; jo.fastInterrupts = false;
@ -575,6 +575,10 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
if (js.memcheck && (opinfo->flags & FL_LOADSTORE)) if (js.memcheck && (opinfo->flags & FL_LOADSTORE))
{ {
// In case we are about to jump to the dispatcher, flush regs
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
TEST(32, M(&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_DSI)); TEST(32, M(&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_DSI));
FixupBranch noMemException = J_CC(CC_Z); FixupBranch noMemException = J_CC(CC_Z);

View File

@ -38,51 +38,27 @@ void Jit64::lbzx(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStore) JITDISABLE(LoadStore)
if (Core::g_CoreStartupParameter.bJITLoadStorelbzxOff) if (Core::g_CoreStartupParameter.bJITLoadStorelbzxOff)
{ Default(inst); return; } { Default(inst); return; }
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.Lock(a, b, d);
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
if (b == d || a == d)
gpr.LoadToX64(d, true, true);
else
gpr.LoadToX64(d, false, true);
MOV(32, R(ABI_PARAM1), gpr.R(b)); MOV(32, R(ABI_PARAM1), gpr.R(b));
if (a) if (a)
{
ADD(32, R(ABI_PARAM1), gpr.R(a)); ADD(32, R(ABI_PARAM1), gpr.R(a));
#if 0 }
SafeLoadRegToEAX(ABI_PARAM1, 8, 0); SafeLoadRegToEAX(ABI_PARAM1, 8, 0);
MOV(32, gpr.R(d), R(EAX));
#else
UnsafeLoadRegToReg(ABI_PARAM1, gpr.RX(d), 8, 0, false);
#endif
gpr.UnlockAll();
gpr.UnlockAllX();
}
void Jit64::lwzx(UGeckoInstruction inst) MEMCHECK_START
{
INSTRUCTION_START
JITDISABLE(LoadStore)
int a = inst.RA, b = inst.RB, d = inst.RD; gpr.KillImmediate(d);
gpr.Lock(a, b, d);
gpr.FlushLockX(ABI_PARAM1);
if (b == d || a == d)
gpr.LoadToX64(d, true, true);
else
gpr.LoadToX64(d, false, true);
MOV(32, R(ABI_PARAM1), gpr.R(b));
if (a)
ADD(32, R(ABI_PARAM1), gpr.R(a));
#if 1
SafeLoadRegToEAX(ABI_PARAM1, 32, 0);
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
#else
UnsafeLoadRegToReg(ABI_PARAM1, gpr.RX(d), 32, 0, false); MEMCHECK_END
#endif
gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
} }
@ -92,21 +68,48 @@ void Jit64::lhax(UGeckoInstruction inst)
JITDISABLE(LoadStore) JITDISABLE(LoadStore)
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.Lock(a, b, d);
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
if (b == d || a == d)
gpr.LoadToX64(d, true, true);
else
gpr.LoadToX64(d, false, true);
MOV(32, R(ABI_PARAM1), gpr.R(b)); MOV(32, R(ABI_PARAM1), gpr.R(b));
if (a) if (a)
{
ADD(32, R(ABI_PARAM1), gpr.R(a)); ADD(32, R(ABI_PARAM1), gpr.R(a));
}
// Some homebrew actually loads from a hw reg with this instruction // Some homebrew actually loads from a hw reg with this instruction
SafeLoadRegToEAX(ABI_PARAM1, 16, 0, true); SafeLoadRegToEAX(ABI_PARAM1, 16, 0, true);
MEMCHECK_START
gpr.KillImmediate(d);
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
gpr.UnlockAll(); MEMCHECK_END
gpr.UnlockAllX();
}
void Jit64::lwzx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.FlushLockX(ABI_PARAM1);
MOV(32, R(ABI_PARAM1), gpr.R(b));
if (a)
{
ADD(32, R(ABI_PARAM1), gpr.R(a));
}
SafeLoadRegToEAX(ABI_PARAM1, 32, 0);
MEMCHECK_START
gpr.KillImmediate(d);
MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END
gpr.UnlockAllX(); gpr.UnlockAllX();
} }
@ -114,6 +117,7 @@ void Jit64::lXz(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStore) JITDISABLE(LoadStore)
if (Core::g_CoreStartupParameter.bJITLoadStorelXzOff) if (Core::g_CoreStartupParameter.bJITLoadStorelXzOff)
{ Default(inst); return; } { Default(inst); return; }
@ -135,7 +139,7 @@ void Jit64::lXz(UGeckoInstruction inst)
{ {
// TODO(LinesPrower): // TODO(LinesPrower):
// - Rewrite this! // - Rewrite this!
// It seems to be ugly and unefficient, but I don't know JIT stuff enough to make it right // It seems to be ugly and inefficient, but I don't know JIT stuff enough to make it right
// It only demonstrates the idea // It only demonstrates the idea
// do our job at first // do our job at first
@ -178,6 +182,7 @@ void Jit64::lXz(UGeckoInstruction inst)
Default(inst); Default(inst);
return; return;
} }
int accessSize; int accessSize;
switch (inst.OPCD) switch (inst.OPCD)
{ {
@ -193,40 +198,32 @@ void Jit64::lXz(UGeckoInstruction inst)
return; return;
} }
//Still here? Do regular path. if (accessSize == 32 && jo.enableFastMem && !Core::g_CoreStartupParameter.bMMU)
#if defined(_M_X64) {
if (accessSize == 8 || accessSize == 16 || !jo.enableFastMem) { // Fast and daring
#else gpr.Lock(a, d);
if (true) { gpr.LoadToX64(a, true, false);
#endif gpr.LoadToX64(d, a == d, true);
// Safe and boring MOV(accessSize, gpr.R(d), MComplex(RBX, gpr.R(a).GetSimpleReg(), SCALE_1, offset));
BSWAP(32, gpr.R(d).GetSimpleReg());
gpr.UnlockAll();
gpr.Flush(FLUSH_ALL);
}
else
{
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(d, a);
MOV(32, R(ABI_PARAM1), gpr.R(a)); MOV(32, R(ABI_PARAM1), gpr.R(a));
SafeLoadRegToEAX(ABI_PARAM1, accessSize, offset); SafeLoadRegToEAX(ABI_PARAM1, accessSize, offset);
gpr.LoadToX64(d, false, true);
MOV(32, gpr.R(d), R(EAX)); MEMCHECK_START
gpr.UnlockAll();
gpr.UnlockAllX();
return;
}
// Fast and daring gpr.KillImmediate(d);
gpr.Lock(a, d); MOV(32, gpr.R(d), R(EAX));
gpr.LoadToX64(a, true, false);
gpr.LoadToX64(d, a == d, true); MEMCHECK_END
MOV(accessSize, gpr.R(d), MComplex(RBX, gpr.R(a).GetSimpleReg(), SCALE_1, offset));
switch (accessSize) { gpr.UnlockAllX();
case 32:
BSWAP(32, gpr.R(d).GetSimpleReg());
break;
// Careful in the backpatch - need to properly nop over first
// case 16:
// BSWAP(32, gpr.R(d).GetSimpleReg());
// SHR(32, gpr.R(d), Imm8(16));
// break;
} }
gpr.UnlockAll();
} }
void Jit64::lha(UGeckoInstruction inst) void Jit64::lha(UGeckoInstruction inst)
@ -239,14 +236,17 @@ void Jit64::lha(UGeckoInstruction inst)
s32 offset = (s32)(s16)inst.SIMM_16; s32 offset = (s32)(s16)inst.SIMM_16;
// Safe and boring // Safe and boring
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(d, a);
MOV(32, R(ABI_PARAM1), gpr.R(a)); MOV(32, R(ABI_PARAM1), gpr.R(a));
SafeLoadRegToEAX(ABI_PARAM1, 16, offset, true); SafeLoadRegToEAX(ABI_PARAM1, 16, offset, true);
gpr.LoadToX64(d, d == a, true);
MEMCHECK_START
gpr.KillImmediate(d);
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
gpr.UnlockAll();
MEMCHECK_END
gpr.UnlockAllX(); gpr.UnlockAllX();
return;
} }
void Jit64::lwzux(UGeckoInstruction inst) void Jit64::lwzux(UGeckoInstruction inst)
@ -260,17 +260,20 @@ void Jit64::lwzux(UGeckoInstruction inst)
Default(inst); Default(inst);
return; return;
} }
gpr.Lock(a, b, d); gpr.Lock(a);
gpr.LoadToX64(d, b == d, true);
gpr.LoadToX64(a, true, true); gpr.LoadToX64(a, true, true);
ADD(32, gpr.R(a), gpr.R(b)); ADD(32, gpr.R(a), gpr.R(b));
MOV(32, R(EAX), gpr.R(a)); MOV(32, R(EAX), gpr.R(a));
SafeLoadRegToEAX(EAX, 32, 0, false); SafeLoadRegToEAX(EAX, 32, 0, false);
MEMCHECK_START
gpr.KillImmediate(d);
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END
gpr.UnlockAll(); gpr.UnlockAll();
return;
} }
// Zero cache line. // Zero cache line.
@ -348,7 +351,7 @@ void Jit64::stX(UGeckoInstruction inst)
gpr.SetImmediate32(a, addr); gpr.SetImmediate32(a, addr);
MOV(accessSize, R(EAX), gpr.R(s)); MOV(accessSize, R(EAX), gpr.R(s));
BSWAP(accessSize, EAX); BSWAP(accessSize, EAX);
WriteToConstRamAddress(accessSize, R(EAX), addr); WriteToConstRamAddress(accessSize, R(EAX), addr);
return; return;
} }
// Other IO not worth the trouble. // Other IO not worth the trouble.
@ -387,35 +390,23 @@ void Jit64::stX(UGeckoInstruction inst)
#endif*/ #endif*/
//Still here? Do regular path. //Still here? Do regular path.
gpr.Lock(s, a);
gpr.FlushLockX(ECX, EDX); gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
MOV(32, R(EDX), gpr.R(a)); gpr.Lock(a);
MOV(32, R(ECX), gpr.R(s)); MOV(32, R(ABI_PARAM2), gpr.R(a));
if (offset) MOV(32, R(ABI_PARAM1), gpr.R(s));
ADD(32, R(EDX), Imm32((u32)offset)); SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, accessSize, offset);
if (update && offset) if (update && offset)
{ {
gpr.LoadToX64(a, true, true); MEMCHECK_START
MOV(32, gpr.R(a), R(EDX));
gpr.KillImmediate(a);
MOV(32, gpr.R(a), R(ABI_PARAM2));
MEMCHECK_END
} }
TEST(32, R(EDX), Imm32(0x0C000000));
FixupBranch unsafe_addr = J_CC(CC_NZ);
BSWAP(accessSize, ECX);
#ifdef _M_X64
MOV(accessSize, MComplex(RBX, EDX, SCALE_1, 0), R(ECX));
#else
AND(32, R(EDX), Imm32(Memory::MEMVIEW32_MASK));
MOV(accessSize, MDisp(EDX, (u32)Memory::base), R(ECX));
#endif
FixupBranch skip_call = J();
SetJumpTarget(unsafe_addr);
switch (accessSize)
{
case 32: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ECX, EDX); break;
case 16: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U16, 2), ECX, EDX); break;
case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), ECX, EDX); break;
}
SetJumpTarget(skip_call);
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
} }
@ -459,9 +450,14 @@ void Jit64::stXx(UGeckoInstruction inst)
MOV(32, R(ECX), gpr.R(s)); MOV(32, R(ECX), gpr.R(s));
SafeWriteRegToReg(ECX, EDX, accessSize, 0); SafeWriteRegToReg(ECX, EDX, accessSize, 0);
//MEMCHECK_START
// TODO: Insert rA update code here
//MEMCHECK_END
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
return;
} }
// A few games use these heavily in video codecs. // A few games use these heavily in video codecs.

View File

@ -63,7 +63,6 @@ void Jit64::lfs(UGeckoInstruction inst)
} }
s32 offset = (s32)(s16)inst.SIMM_16; s32 offset = (s32)(s16)inst.SIMM_16;
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(a);
MOV(32, R(ABI_PARAM1), gpr.R(a)); MOV(32, R(ABI_PARAM1), gpr.R(a));
if (jo.assumeFPLoadFromMem) if (jo.assumeFPLoadFromMem)
{ {
@ -74,12 +73,16 @@ void Jit64::lfs(UGeckoInstruction inst)
SafeLoadRegToEAX(ABI_PARAM1, 32, offset); SafeLoadRegToEAX(ABI_PARAM1, 32, offset);
} }
MEMCHECK_START
MOV(32, M(&temp32), R(EAX)); MOV(32, M(&temp32), R(EAX));
fpr.Lock(d); fpr.Lock(d);
fpr.LoadToX64(d, false); fpr.LoadToX64(d, false);
CVTSS2SD(fpr.RX(d), M(&temp32)); CVTSS2SD(fpr.RX(d), M(&temp32));
MOVDDUP(fpr.RX(d), fpr.R(d)); MOVDDUP(fpr.RX(d), fpr.R(d));
gpr.UnlockAll();
MEMCHECK_END
gpr.UnlockAllX(); gpr.UnlockAllX();
fpr.UnlockAll(); fpr.UnlockAll();
} }
@ -90,6 +93,8 @@ void Jit64::lfd(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStoreFloating) JITDISABLE(LoadStoreFloating)
if (js.memcheck) { Default(inst); return; }
int d = inst.RD; int d = inst.RD;
int a = inst.RA; int a = inst.RA;
if (!a) if (!a)
@ -119,18 +124,28 @@ void Jit64::lfd(UGeckoInstruction inst)
MOV(64, R(EAX), MComplex(RBX, ABI_PARAM1, SCALE_1, offset)); MOV(64, R(EAX), MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
BSWAP(64, EAX); BSWAP(64, EAX);
MOV(64, M(&temp64), R(EAX)); MOV(64, M(&temp64), R(EAX));
MEMCHECK_START
MOVSD(XMM0, M(&temp64)); MOVSD(XMM0, M(&temp64));
MOVSD(xd, R(XMM0)); MOVSD(xd, R(XMM0));
MEMCHECK_END
#else #else
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK)); AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::base + offset)); MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::base + offset));
BSWAP(32, EAX); BSWAP(32, EAX);
MOV(32, M((void*)((u8 *)&temp64+4)), R(EAX)); MOV(32, M((void*)((u8 *)&temp64+4)), R(EAX));
MEMCHECK_START
MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::base + offset + 4)); MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::base + offset + 4));
BSWAP(32, EAX); BSWAP(32, EAX);
MOV(32, M(&temp64), R(EAX)); MOV(32, M(&temp64), R(EAX));
MOVSD(XMM0, M(&temp64)); MOVSD(XMM0, M(&temp64));
MOVSD(xd, R(XMM0)); MOVSD(xd, R(XMM0));
MEMCHECK_END
#if 0 #if 0
// Alternate implementation; possibly faster // Alternate implementation; possibly faster
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK)); AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
@ -156,6 +171,8 @@ void Jit64::stfd(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStoreFloating) JITDISABLE(LoadStoreFloating)
if (js.memcheck) { Default(inst); return; }
int s = inst.RS; int s = inst.RS;
int a = inst.RA; int a = inst.RA;
if (!a) if (!a)
@ -207,18 +224,28 @@ void Jit64::stfd(UGeckoInstruction inst)
#ifdef _M_X64 #ifdef _M_X64
fpr.LoadToX64(s, true, false); fpr.LoadToX64(s, true, false);
MOVSD(M(&temp64), fpr.RX(s)); MOVSD(M(&temp64), fpr.RX(s));
MEMCHECK_START
MOV(64, R(EAX), M(&temp64)); MOV(64, R(EAX), M(&temp64));
BSWAP(64, EAX); BSWAP(64, EAX);
MOV(64, MComplex(RBX, ABI_PARAM1, SCALE_1, 0), R(EAX)); MOV(64, MComplex(RBX, ABI_PARAM1, SCALE_1, 0), R(EAX));
MEMCHECK_END
#else #else
fpr.LoadToX64(s, true, false); fpr.LoadToX64(s, true, false);
MOVSD(M(&temp64), fpr.RX(s)); MOVSD(M(&temp64), fpr.RX(s));
MEMCHECK_START
MOV(32, R(EAX), M(&temp64)); MOV(32, R(EAX), M(&temp64));
BSWAP(32, EAX); BSWAP(32, EAX);
MOV(32, MDisp(ABI_PARAM1, (u32)Memory::base + 4), R(EAX)); MOV(32, MDisp(ABI_PARAM1, (u32)Memory::base + 4), R(EAX));
MOV(32, R(EAX), M((void*)((u8 *)&temp64 + 4))); MOV(32, R(EAX), M((void*)((u8 *)&temp64 + 4)));
BSWAP(32, EAX); BSWAP(32, EAX);
MOV(32, MDisp(ABI_PARAM1, (u32)Memory::base), R(EAX)); MOV(32, MDisp(ABI_PARAM1, (u32)Memory::base), R(EAX));
MEMCHECK_END
#endif #endif
} }
SetJumpTarget(quit); SetJumpTarget(quit);
@ -233,6 +260,8 @@ void Jit64::stfs(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStoreFloating) JITDISABLE(LoadStoreFloating)
if (js.memcheck) { Default(inst); return; }
bool update = inst.OPCD & 1; bool update = inst.OPCD & 1;
int s = inst.RS; int s = inst.RS;
int a = inst.RA; int a = inst.RA;
@ -287,6 +316,8 @@ void Jit64::stfsx(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStoreFloating) JITDISABLE(LoadStoreFloating)
if (js.memcheck) { Default(inst); return; }
// We can take a shortcut here - it's not likely that a hardware access would use this instruction. // We can take a shortcut here - it's not likely that a hardware access would use this instruction.
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
fpr.Lock(inst.RS); fpr.Lock(inst.RS);
@ -295,7 +326,8 @@ void Jit64::stfsx(UGeckoInstruction inst)
ADD(32, R(ABI_PARAM1), gpr.R(inst.RA)); ADD(32, R(ABI_PARAM1), gpr.R(inst.RA));
CVTSD2SS(XMM0, fpr.R(inst.RS)); CVTSD2SS(XMM0, fpr.R(inst.RS));
MOVD_xmm(R(EAX), XMM0); MOVD_xmm(R(EAX), XMM0);
UnsafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0); SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0);
gpr.UnlockAllX(); gpr.UnlockAllX();
fpr.UnlockAll(); fpr.UnlockAll();
} }
@ -306,12 +338,14 @@ void Jit64::lfsx(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStoreFloating) JITDISABLE(LoadStoreFloating)
fpr.Lock(inst.RS);
fpr.LoadToX64(inst.RS, false, true);
MOV(32, R(EAX), gpr.R(inst.RB)); MOV(32, R(EAX), gpr.R(inst.RB));
if (inst.RA) if (inst.RA)
{
ADD(32, R(EAX), gpr.R(inst.RA)); ADD(32, R(EAX), gpr.R(inst.RA));
if (cpu_info.bSSSE3) { }
if (cpu_info.bSSSE3 && !js.memcheck) {
fpr.Lock(inst.RS);
fpr.LoadToX64(inst.RS, false, true);
X64Reg r = fpr.R(inst.RS).GetSimpleReg(); X64Reg r = fpr.R(inst.RS).GetSimpleReg();
#ifdef _M_IX86 #ifdef _M_IX86
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
@ -319,14 +353,25 @@ void Jit64::lfsx(UGeckoInstruction inst)
#else #else
MOVD_xmm(r, MComplex(RBX, EAX, SCALE_1, 0)); MOVD_xmm(r, MComplex(RBX, EAX, SCALE_1, 0));
#endif #endif
MEMCHECK_START
PSHUFB(r, M((void *)bswapShuffle1x4)); PSHUFB(r, M((void *)bswapShuffle1x4));
CVTSS2SD(r, R(r)); CVTSS2SD(r, R(r));
MOVDDUP(r, R(r)); MOVDDUP(r, R(r));
MEMCHECK_END
} else { } else {
UnsafeLoadRegToReg(EAX, EAX, 32, false); SafeLoadRegToEAX(EAX, 32, false);
MEMCHECK_START
MOV(32, M(&temp32), R(EAX)); MOV(32, M(&temp32), R(EAX));
CVTSS2SD(XMM0, M(&temp32)); CVTSS2SD(XMM0, M(&temp32));
fpr.Lock(inst.RS);
fpr.LoadToX64(inst.RS, false, true);
MOVDDUP(fpr.R(inst.RS).GetSimpleReg(), R(XMM0)); MOVDDUP(fpr.R(inst.RS).GetSimpleReg(), R(XMM0));
MEMCHECK_END
} }
fpr.UnlockAll(); fpr.UnlockAll();
} }

View File

@ -54,6 +54,8 @@ void Jit64::psq_st(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStorePaired) JITDISABLE(LoadStorePaired)
if (js.memcheck) { Default(inst); return; }
if (!inst.RA) if (!inst.RA)
{ {
// TODO: Support these cases if it becomes necessary. // TODO: Support these cases if it becomes necessary.
@ -136,6 +138,8 @@ void Jit64::psq_l(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStorePaired) JITDISABLE(LoadStorePaired)
if (js.memcheck) { Default(inst); return; }
if (!inst.RA) if (!inst.RA)
{ {
Default(inst); Default(inst);
@ -174,7 +178,13 @@ void Jit64::psq_l(UGeckoInstruction inst)
ABI_AlignStack(0); ABI_AlignStack(0);
CALLptr(MDisp(EDX, (u32)(u64)asm_routines.pairedLoadQuantized)); CALLptr(MDisp(EDX, (u32)(u64)asm_routines.pairedLoadQuantized));
ABI_RestoreStack(0); ABI_RestoreStack(0);
// MEMCHECK_START // FIXME: MMU does not work here because of unsafe memory access
CVTPS2PD(fpr.RX(inst.RS), R(XMM0)); CVTPS2PD(fpr.RX(inst.RS), R(XMM0));
// MEMCHECK_END
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
} }

View File

@ -465,7 +465,6 @@ static OpArg regBuildMemAddress(RegInfo& RI, InstLoc I, InstLoc AI,
} }
static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) { static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) {
bool win32 = false;
if (RI.UseProfile) { if (RI.UseProfile) {
unsigned curLoad = ProfiledLoads[RI.numProfiledLoads++]; unsigned curLoad = ProfiledLoads[RI.numProfiledLoads++];
if (!(curLoad & 0x0C000000)) { if (!(curLoad & 0x0C000000)) {
@ -486,15 +485,8 @@ static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) {
if (RI.MakeProfile) { if (RI.MakeProfile) {
RI.Jit->MOV(32, M(&ProfiledLoads[RI.numProfiledLoads++]), R(ECX)); RI.Jit->MOV(32, M(&ProfiledLoads[RI.numProfiledLoads++]), R(ECX));
} }
#ifdef _M_IX86 RI.Jit->TEST(32, R(ECX), Imm32(0x0C000000));
win32 = true; FixupBranch argh = RI.Jit->J_CC(CC_Z);
#endif
FixupBranch argh;
if (!(win32 && SConfig::GetInstance().m_LocalCoreStartupParameter.iTLBHack == 1))
{
RI.Jit->TEST(32, R(ECX), Imm32(0x0C000000));
argh = RI.Jit->J_CC(CC_Z);
}
// Slow safe read using Memory::Read_Ux routines // Slow safe read using Memory::Read_Ux routines
#ifdef _M_IX86 // we don't allocate EAX on x64 so no reason to save it. #ifdef _M_IX86 // we don't allocate EAX on x64 so no reason to save it.
@ -514,14 +506,10 @@ static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) {
RI.Jit->POP(32, R(EAX)); RI.Jit->POP(32, R(EAX));
#endif #endif
} }
if (!(win32 && SConfig::GetInstance().m_LocalCoreStartupParameter.iTLBHack == 1)) FixupBranch arg2 = RI.Jit->J();
{ RI.Jit->SetJumpTarget(argh);
FixupBranch arg2 = RI.Jit->J(); RI.Jit->UnsafeLoadRegToReg(ECX, reg, Size, 0, false);
// Fast unsafe read using memory pointer EBX RI.Jit->SetJumpTarget(arg2);
RI.Jit->SetJumpTarget(argh);
RI.Jit->UnsafeLoadRegToReg(ECX, reg, Size, 0, false);
RI.Jit->SetJumpTarget(arg2);
}
if (regReadUse(RI, I)) if (regReadUse(RI, I))
RI.regs[reg] = I; RI.regs[reg] = I;
} }

View File

@ -184,7 +184,7 @@ void JitIL::Init()
#else #else
jo.enableFastMem = false; jo.enableFastMem = false;
#endif #endif
jo.assumeFPLoadFromMem = true; jo.assumeFPLoadFromMem = Core::g_CoreStartupParameter.bUseFastMem;
jo.fpAccurateFcmp = false; jo.fpAccurateFcmp = false;
jo.optimizeGatherPipe = true; jo.optimizeGatherPipe = true;
jo.fastInterrupts = false; jo.fastInterrupts = false;

View File

@ -39,6 +39,7 @@ void JitIL::lhax(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStore) JITDISABLE(LoadStore)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB); IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB);
if (inst.RA) if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
@ -50,7 +51,8 @@ void JitIL::lhax(UGeckoInstruction inst)
void JitIL::lXz(UGeckoInstruction inst) void JitIL::lXz(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStore) JITDISABLE(LoadStore)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16); IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16);
if (inst.RA) if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
@ -81,6 +83,7 @@ void JitIL::lha(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStore) JITDISABLE(LoadStore)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr = IREmitter::InstLoc addr =
ibuild.EmitIntConst((s32)(s16)inst.SIMM_16); ibuild.EmitIntConst((s32)(s16)inst.SIMM_16);
if (inst.RA) if (inst.RA)
@ -94,6 +97,7 @@ void JitIL::lXzx(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStore) JITDISABLE(LoadStore)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB); IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB);
if (inst.RA) { if (inst.RA) {
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
@ -141,6 +145,7 @@ void JitIL::stX(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStore) JITDISABLE(LoadStore)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16), IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16),
value = ibuild.EmitLoadGReg(inst.RS); value = ibuild.EmitLoadGReg(inst.RS);
if (inst.RA) if (inst.RA)
@ -160,6 +165,7 @@ void JitIL::stXx(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStore) JITDISABLE(LoadStore)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB), IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB),
value = ibuild.EmitLoadGReg(inst.RS); value = ibuild.EmitLoadGReg(inst.RS);
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
@ -179,6 +185,7 @@ void JitIL::lmw(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStore) JITDISABLE(LoadStore)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16); IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16);
if (inst.RA) if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
@ -194,6 +201,7 @@ void JitIL::stmw(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStore) JITDISABLE(LoadStore)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16); IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16);
if (inst.RA) if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));

View File

@ -43,6 +43,7 @@ void JitIL::lfs(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStoreFloating) JITDISABLE(LoadStoreFloating)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16), val; IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16), val;
if (inst.RA) if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
@ -56,6 +57,7 @@ void JitIL::lfd(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStoreFloating) JITDISABLE(LoadStoreFloating)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16), val; IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16), val;
if (inst.RA) if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
@ -70,6 +72,7 @@ void JitIL::stfd(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStoreFloating) JITDISABLE(LoadStoreFloating)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16), IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16),
val = ibuild.EmitLoadFReg(inst.RS); val = ibuild.EmitLoadFReg(inst.RS);
if (inst.RA) if (inst.RA)
@ -85,6 +88,7 @@ void JitIL::stfs(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStoreFloating) JITDISABLE(LoadStoreFloating)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16), IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16),
val = ibuild.EmitLoadFReg(inst.RS); val = ibuild.EmitLoadFReg(inst.RS);
if (inst.RA) if (inst.RA)
@ -101,6 +105,7 @@ void JitIL::stfsx(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStoreFloating) JITDISABLE(LoadStoreFloating)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB), IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB),
val = ibuild.EmitLoadFReg(inst.RS); val = ibuild.EmitLoadFReg(inst.RS);
if (inst.RA) if (inst.RA)
@ -115,6 +120,7 @@ void JitIL::lfsx(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStoreFloating) JITDISABLE(LoadStoreFloating)
if (js.memcheck) { Default(inst); return; }
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB), val; IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB), val;
if (inst.RA) if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));

View File

@ -37,6 +37,7 @@ void JitIL::psq_st(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStorePaired) JITDISABLE(LoadStorePaired)
if (js.memcheck) { Default(inst); return; }
if (inst.W) {Default(inst); return;} if (inst.W) {Default(inst); return;}
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_12), val; IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_12), val;
if (inst.RA) if (inst.RA)
@ -52,6 +53,7 @@ void JitIL::psq_l(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStorePaired) JITDISABLE(LoadStorePaired)
if (js.memcheck) { Default(inst); return; }
if (inst.W) {Default(inst); return;} if (inst.W) {Default(inst); return;}
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_12), val; IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_12), val;
if (inst.RA) if (inst.RA)

View File

@ -31,15 +31,9 @@
using namespace Gen; using namespace Gen;
void EmuCodeBlock::JitClearCA()
{
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0
}
void EmuCodeBlock::JitSetCA() static const u8 GC_ALIGNED16(pbswapShuffle1x4[16]) = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
{ static u32 GC_ALIGNED16(float_buffer);
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1
}
void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend) void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend)
{ {
@ -60,7 +54,9 @@ void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int acc
SAR(32, R(reg_value), Imm8(16)); SAR(32, R(reg_value), Imm8(16));
else else
SHR(32, R(reg_value), Imm8(16)); SHR(32, R(reg_value), Imm8(16));
} else if (signExtend) { }
else if (signExtend)
{
// TODO: bake 8-bit into the original load. // TODO: bake 8-bit into the original load.
MOVSX(32, accessSize, reg_value, R(reg_value)); MOVSX(32, accessSize, reg_value, R(reg_value));
} }
@ -76,26 +72,53 @@ void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, i
#endif #endif
} }
void EmuCodeBlock::SafeLoadRegToEAX(X64Reg reg, int accessSize, s32 offset, bool signExtend) void EmuCodeBlock::SafeLoadRegToEAX(X64Reg reg_addr, int accessSize, s32 offset, bool signExtend)
{ {
if (offset) if (Core::g_CoreStartupParameter.bUseFastMem && accessSize == 32 && !Core::g_CoreStartupParameter.bMMU)
ADD(32, R(reg), Imm32((u32)offset));
TEST(32, R(reg), Imm32(0x0C000000));
FixupBranch argh = J_CC(CC_Z);
switch (accessSize)
{ {
case 32: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), reg); break; UnsafeLoadRegToReg(reg_addr, EAX, accessSize, offset, signExtend);
case 16: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), reg); break;
case 8: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), reg); break;
} }
if (signExtend && accessSize < 32) { else
// Need to sign extend values coming from the Read_U* functions. {
MOVSX(32, accessSize, EAX, R(EAX)); if (offset)
ADD(32, R(reg_addr), Imm32((u32)offset));
FixupBranch addrf0;
FixupBranch addr20;
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
{
CMP(32, R(reg_addr), Imm32(0xf0000000));
addrf0 = J_CC(CC_GE);
TEST(32, R(reg_addr), Imm32(0x20000000));
addr20 = J_CC(CC_NZ);
}
TEST(32, R(reg_addr), Imm32(0x0C000000));
FixupBranch fast = J_CC(CC_Z);
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
{
SetJumpTarget(addr20);
SetJumpTarget(addrf0);
}
switch (accessSize)
{
case 32: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), reg_addr); break;
case 16: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), reg_addr); break;
case 8: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), reg_addr); break;
}
if (signExtend && accessSize < 32)
{
// Need to sign extend values coming from the Read_U* functions.
MOVSX(32, accessSize, EAX, R(EAX));
}
FixupBranch exit = J();
SetJumpTarget(fast);
UnsafeLoadRegToReg(reg_addr, EAX, accessSize, 0, signExtend);
SetJumpTarget(exit);
} }
FixupBranch arg2 = J();
SetJumpTarget(argh);
UnsafeLoadRegToReg(reg, EAX, accessSize, 0, signExtend);
SetJumpTarget(arg2);
} }
void EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap) void EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap)
@ -116,23 +139,47 @@ void EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int ac
void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap) void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap)
{ {
if (offset) if (offset)
ADD(32, R(reg_addr), Imm32(offset)); ADD(32, R(reg_addr), Imm32((u32)offset));
TEST(32, R(reg_addr), Imm32(0x0C000000));
FixupBranch argh = J_CC(CC_Z); // TODO: Figure out a cleaner way to check memory bounds
FixupBranch addrf0;
FixupBranch addr20;
FixupBranch fast;
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
{
CMP(32, R(reg_addr), Imm32(0xf0000000));
addrf0 = J_CC(CC_GE);
TEST(32, R(reg_addr), Imm32(0x20000000));
addr20 = J_CC(CC_NZ);
}
if (!Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bUseFastMem)
{
TEST(32, R(reg_addr), Imm32(0x0C000000));
fast = J_CC(CC_Z);
}
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
{
SetJumpTarget(addr20);
SetJumpTarget(addrf0);
}
switch (accessSize) switch (accessSize)
{ {
case 32: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), 2), reg_value, reg_addr); break; case 32: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), 2), reg_value, reg_addr); break;
case 16: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), 2), reg_value, reg_addr); break; case 16: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), 2), reg_value, reg_addr); break;
case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), reg_value, reg_addr); break; case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), reg_value, reg_addr); break;
} }
FixupBranch arg2 = J(); FixupBranch exit = J();
SetJumpTarget(argh);
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap);
SetJumpTarget(arg2);
}
static const u8 GC_ALIGNED16(pbswapShuffle1x4[16]) = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; if (!Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bUseFastMem)
static u32 GC_ALIGNED16(float_buffer); {
SetJumpTarget(fast);
}
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap);
SetJumpTarget(exit);
}
void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr) void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr)
{ {
@ -147,12 +194,12 @@ void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr)
FixupBranch arg2 = J(); FixupBranch arg2 = J();
SetJumpTarget(argh); SetJumpTarget(argh);
PSHUFB(xmm_value, M((void *)pbswapShuffle1x4)); PSHUFB(xmm_value, M((void *)pbswapShuffle1x4));
#ifdef _M_IX86 #ifdef _M_IX86
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK)); AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
MOVD_xmm(MDisp(reg_addr, (u32)Memory::base), xmm_value); MOVD_xmm(MDisp(reg_addr, (u32)Memory::base), xmm_value);
#else #else
MOVD_xmm(MComplex(RBX, reg_addr, SCALE_1, 0), xmm_value); MOVD_xmm(MComplex(RBX, reg_addr, SCALE_1, 0), xmm_value);
#endif #endif
SetJumpTarget(arg2); SetJumpTarget(arg2);
} else { } else {
MOVSS(M(&float_buffer), xmm_value); MOVSS(M(&float_buffer), xmm_value);
@ -197,3 +244,13 @@ void EmuCodeBlock::ForceSinglePrecisionP(X64Reg xmm) {
CVTPS2PD(xmm, R(xmm)); CVTPS2PD(xmm, R(xmm));
} }
} }
void EmuCodeBlock::JitClearCA()
{
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0
}
void EmuCodeBlock::JitSetCA()
{
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1
}