diff --git a/Source/Core/Common/CMakeLists.txt b/Source/Core/Common/CMakeLists.txt index 6e230c7fa7..0302ee78c8 100644 --- a/Source/Core/Common/CMakeLists.txt +++ b/Source/Core/Common/CMakeLists.txt @@ -37,7 +37,6 @@ else() if(NOT _M_GENERIC) #X86 set(SRCS ${SRCS} Src/x64FPURoundMode.cpp - Src/x64Thunk.cpp ) endif() set(SRCS ${SRCS} Src/x64CPUDetect.cpp) diff --git a/Source/Core/Common/Common.vcxproj b/Source/Core/Common/Common.vcxproj index 4066febcca..20cf92dfc3 100644 --- a/Source/Core/Common/Common.vcxproj +++ b/Source/Core/Common/Common.vcxproj @@ -217,7 +217,6 @@ - @@ -263,7 +262,6 @@ - diff --git a/Source/Core/Common/Common.vcxproj.filters b/Source/Core/Common/Common.vcxproj.filters index a912cc3d8a..05b9edcb14 100644 --- a/Source/Core/Common/Common.vcxproj.filters +++ b/Source/Core/Common/Common.vcxproj.filters @@ -47,7 +47,6 @@ - @@ -84,7 +83,6 @@ - diff --git a/Source/Core/Common/Src/Thunk.h b/Source/Core/Common/Src/Thunk.h deleted file mode 100644 index b1487badf8..0000000000 --- a/Source/Core/Common/Src/Thunk.h +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project -// Licensed under GPLv2 -// Refer to the license.txt file included. - -#ifndef _THUNK_H_ -#define _THUNK_H_ - -#include - -#include "Common.h" -#include "x64Emitter.h" - -// This simple class creates a wrapper around a C/C++ function that saves all fp state -// before entering it, and restores it upon exit. This is required to be able to selectively -// call functions from generated code, without inflicting the performance hit and increase -// of complexity that it means to protect the generated code from this problem. - -// This process is called thunking. - -// There will only ever be one level of thunking on the stack, plus, -// we don't want to pollute the stack, so we store away regs somewhere global. -// NOT THREAD SAFE. This may only be used from the CPU thread. -// Any other thread using this stuff will be FATAL. - -class ThunkManager : public Gen::XCodeBlock -{ - std::map thunks; - - const u8 *save_regs; - const u8 *load_regs; - -public: - ThunkManager() { - Init(); - } - ~ThunkManager() { - Shutdown(); - } - void *ProtectFunction(void *function, int num_params); -private: - void Init(); - void Shutdown(); - void Reset(); -}; - -#endif // _THUNK_H_ diff --git a/Source/Core/Common/Src/x64ABI.cpp b/Source/Core/Common/Src/x64ABI.cpp index 54d19c7775..3750636c3e 100644 --- a/Source/Core/Common/Src/x64ABI.cpp +++ b/Source/Core/Common/Src/x64ABI.cpp @@ -57,6 +57,86 @@ void XEmitter::ABI_RestoreStack(unsigned int frameSize, bool noProlog) { } } +void XEmitter::ABI_PushRegistersAndAdjustStack(u32 mask, bool noProlog) +{ + int regSize = +#ifdef _M_X64 + 8; +#else + 4; +#endif + int shadow = 0; +#if defined(_WIN32) && defined(_M_X64) + shadow = 0x20; +#endif + int count = 0; + for (int r = 0; r < 16; r++) + { + if (mask & (1 << r)) + { + PUSH((X64Reg) r); + count++; + } + } + int size = ((noProlog ? -regSize : 0) - (count * regSize)) & 0xf; + for (int x = 0; x < 16; x++) + { + if (mask & (1 << (16 + x))) + size += 16; + } + size += shadow; + if (size) + SUB(regSize * 8, R(RSP), size >= 0x80 ? Imm32(size) : Imm8(size)); + int offset = shadow; + for (int x = 0; x < 16; x++) + { + if (mask & (1 << (16 + x))) + { + MOVAPD(MDisp(RSP, offset), (X64Reg) x); + offset += 16; + } + } +} + +void XEmitter::ABI_PopRegistersAndAdjustStack(u32 mask, bool noProlog) +{ + int regSize = +#ifdef _M_X64 + 8; +#else + 4; +#endif + int size = 0; +#if defined(_WIN32) && defined(_M_X64) + size += 0x20; +#endif + for (int x = 0; x < 16; x++) + { + if (mask & (1 << (16 + x))) + { + MOVAPD((X64Reg) x, MDisp(RSP, size)); + size += 16; + } + } + int count = 0; + for (int r = 0; r < 16; r++) + { + if (mask & (1 << r)) + count++; + } + size += ((noProlog ? -regSize : 0) - (count * regSize)) & 0xf; + + if (size) + ADD(regSize * 8, R(RSP), size >= 0x80 ? Imm32(size) : Imm8(size)); + for (int r = 15; r >= 0; r--) + { + if (mask & (1 << r)) + { + POP((X64Reg) r); + } + } +} + #ifdef _M_IX86 // All32 // Shared code between Win32 and Unix32 diff --git a/Source/Core/Common/Src/x64ABI.h b/Source/Core/Common/Src/x64ABI.h index 837e4ec3d8..4b10d11e54 100644 --- a/Source/Core/Common/Src/x64ABI.h +++ b/Source/Core/Common/Src/x64ABI.h @@ -43,6 +43,8 @@ // 32-bit bog standard cdecl, shared between linux and windows // MacOSX 32-bit is same as System V with a few exceptions that we probably don't care much about. +#define ALL_CALLEE_SAVED ((1 << EAX) | (1 << ECX) | (1 << EDX)) + #else // 64 bit calling convention #ifdef _WIN32 // 64-bit Windows - the really exotic calling convention @@ -52,7 +54,12 @@ #define ABI_PARAM3 R8 #define ABI_PARAM4 R9 -#else //64-bit Unix (hopefully MacOSX too) +#define ABI_ALL_CALLEE_SAVED ((1 << RAX) | (1 << RCX) | (1 << RDX) | (1 << R8) | \ + (1 << R9) | (1 << R10) | (1 << R11) | \ + (1 << XMM0) | (1 << XMM1) | (1 << XMM2) | (1 << XMM3) | \ + (1 << XMM4) | (1 << XMM5)) + +#else //64-bit Unix / OS X #define ABI_PARAM1 RDI #define ABI_PARAM2 RSI @@ -61,6 +68,10 @@ #define ABI_PARAM5 R8 #define ABI_PARAM6 R9 +#define ABI_ALL_CALLEE_SAVED ((1 << RAX) | (1 << RCX) | (1 << RDX) | (1 << RDI) | \ + (1 << RSI) | (1 << R8) | (1 << R9) | (1 << R10) | (1 << R11) | \ + 0xffff0000 /* xmm0..15 */) + #endif // WIN32 #endif // X86 diff --git a/Source/Core/Common/Src/x64Emitter.cpp b/Source/Core/Common/Src/x64Emitter.cpp index 9b6731856f..a5d7cb2f0f 100644 --- a/Source/Core/Common/Src/x64Emitter.cpp +++ b/Source/Core/Common/Src/x64Emitter.cpp @@ -1634,74 +1634,6 @@ void XEmitter::___CallCdeclImport6(void* impptr, u32 arg0, u32 arg1, u32 arg2, u CALLptr(M(impptr)); } -void XEmitter::PushRegistersAndAlignStack(u32 mask) -{ - int shadow = 0; -#ifdef _WIN32 - shadow = 0x20; -#endif - int count = 0; - for (int r = 0; r < 16; r++) - { - if (mask & (1 << r)) - { - PUSH((X64Reg) r); - count++; - } - } - int size = (count & 1) ? 0 : 8; - for (int x = 0; x < 16; x++) - { - if (mask & (1 << (16 + x))) - size += 16; - } - size += shadow; - if (size) - SUB(64, R(RSP), size >= 0x80 ? Imm32(size) : Imm8(size)); - int offset = shadow; - for (int x = 0; x < 16; x++) - { - if (mask & (1 << (16 + x))) - { - MOVAPD(MDisp(RSP, offset), (X64Reg) x); - offset += 16; - } - } -} - -void XEmitter::PopRegistersAndAlignStack(u32 mask) -{ - int size = 0; -#ifdef _WIN32 - size += 0x20; -#endif - for (int x = 0; x < 16; x++) - { - if (mask & (1 << (16 + x))) - { - MOVAPD((X64Reg) x, MDisp(RSP, size)); - size += 16; - } - } - int count = 0; - for (int r = 0; r < 16; r++) - { - if (mask & (1 << r)) - count++; - } - size += (count & 1) ? 0 : 8; - - if (size) - ADD(64, R(RSP), size >= 0x80 ? Imm32(size) : Imm8(size)); - for (int r = 15; r >= 0; r--) - { - if (mask & (1 << r)) - { - POP((X64Reg) r); - } - } -} - #endif } diff --git a/Source/Core/Common/Src/x64Emitter.h b/Source/Core/Common/Src/x64Emitter.h index 87724a8092..94938b290a 100644 --- a/Source/Core/Common/Src/x64Emitter.h +++ b/Source/Core/Common/Src/x64Emitter.h @@ -646,6 +646,10 @@ public: void ABI_PushAllCalleeSavedRegsAndAdjustStack(); void ABI_PopAllCalleeSavedRegsAndAdjustStack(); + // A more flexible version of the above. + void ABI_PushRegistersAndAdjustStack(u32 mask, bool noProlog); + void ABI_PopRegistersAndAdjustStack(u32 mask, bool noProlog); + unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize, bool noProlog = false); void ABI_AlignStack(unsigned int frameSize, bool noProlog = false); void ABI_RestoreStack(unsigned int frameSize, bool noProlog = false); @@ -691,9 +695,6 @@ public: #define DECLARE_IMPORT(x) extern "C" void *__imp_##x - void PushRegistersAndAlignStack(u32 mask); - void PopRegistersAndAlignStack(u32 mask); - #endif }; // class XEmitter diff --git a/Source/Core/Common/Src/x64Thunk.cpp b/Source/Core/Common/Src/x64Thunk.cpp deleted file mode 100644 index d77d78e40e..0000000000 --- a/Source/Core/Common/Src/x64Thunk.cpp +++ /dev/null @@ -1,121 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project -// Licensed under GPLv2 -// Refer to the license.txt file included. - -#include - -#include "Common.h" -#include "MemoryUtil.h" -#include "x64ABI.h" -#include "Thunk.h" - -#define THUNK_ARENA_SIZE 1024*1024*1 - -namespace -{ - -static u8 GC_ALIGNED32(saved_fp_state[16 * 4 * 4]); -static u8 GC_ALIGNED32(saved_gpr_state[16 * 8]); -static u16 saved_mxcsr; - -} // namespace - -using namespace Gen; - -void ThunkManager::Init() -{ - AllocCodeSpace(THUNK_ARENA_SIZE); - save_regs = GetCodePtr(); - for (int i = 2; i < ABI_GetNumXMMRegs(); i++) - MOVAPS(M(saved_fp_state + i * 16), (X64Reg)(XMM0 + i)); - STMXCSR(M(&saved_mxcsr)); -#ifdef _M_X64 - MOV(64, M(saved_gpr_state + 0 ), R(RCX)); - MOV(64, M(saved_gpr_state + 8 ), R(RDX)); - MOV(64, M(saved_gpr_state + 16), R(R8) ); - MOV(64, M(saved_gpr_state + 24), R(R9) ); - MOV(64, M(saved_gpr_state + 32), R(R10)); - MOV(64, M(saved_gpr_state + 40), R(R11)); -#ifndef _WIN32 - MOV(64, M(saved_gpr_state + 48), R(RSI)); - MOV(64, M(saved_gpr_state + 56), R(RDI)); -#endif - MOV(64, M(saved_gpr_state + 64), R(RBX)); -#else - MOV(32, M(saved_gpr_state + 0 ), R(RCX)); - MOV(32, M(saved_gpr_state + 4 ), R(RDX)); -#endif - RET(); - load_regs = GetCodePtr(); - LDMXCSR(M(&saved_mxcsr)); - for (int i = 2; i < ABI_GetNumXMMRegs(); i++) - MOVAPS((X64Reg)(XMM0 + i), M(saved_fp_state + i * 16)); -#ifdef _M_X64 - MOV(64, R(RCX), M(saved_gpr_state + 0 )); - MOV(64, R(RDX), M(saved_gpr_state + 8 )); - MOV(64, R(R8) , M(saved_gpr_state + 16)); - MOV(64, R(R9) , M(saved_gpr_state + 24)); - MOV(64, R(R10), M(saved_gpr_state + 32)); - MOV(64, R(R11), M(saved_gpr_state + 40)); -#ifndef _WIN32 - MOV(64, R(RSI), M(saved_gpr_state + 48)); - MOV(64, R(RDI), M(saved_gpr_state + 56)); -#endif - MOV(64, R(RBX), M(saved_gpr_state + 64)); -#else - MOV(32, R(RCX), M(saved_gpr_state + 0 )); - MOV(32, R(RDX), M(saved_gpr_state + 4 )); -#endif - RET(); -} - -void ThunkManager::Reset() -{ - thunks.clear(); - ResetCodePtr(); -} - -void ThunkManager::Shutdown() -{ - Reset(); - FreeCodeSpace(); -} - -void *ThunkManager::ProtectFunction(void *function, int num_params) -{ - std::map::iterator iter; - iter = thunks.find(function); - if (iter != thunks.end()) - return (void *)iter->second; - if (!region) - PanicAlert("Trying to protect functions before the emu is started. Bad bad bad."); - - const u8 *call_point = GetCodePtr(); -#ifdef _M_X64 - // Make sure to align stack. - ABI_AlignStack(0, true); - CALL((void*)save_regs); - CALL((void*)function); - CALL((void*)load_regs); - ABI_RestoreStack(0, true); - RET(); -#else - CALL((void*)save_regs); - // Since parameters are in the previous stack frame, not in registers, this takes some - // trickery : we simply re-push the parameters. might not be optimal, but that doesn't really - // matter. - ABI_AlignStack(num_params * 4, true); - unsigned int alignedSize = ABI_GetAlignedFrameSize(num_params * 4, true); - for (int i = 0; i < num_params; i++) { - // ESP is changing, so we do not need i - PUSH(32, MDisp(ESP, alignedSize)); - } - CALL(function); - ABI_RestoreStack(num_params * 4, true); - CALL((void*)load_regs); - RET(); -#endif - - thunks[function] = call_point; - return (void *)call_point; -} diff --git a/Source/Core/Core/Src/ConfigManager.cpp b/Source/Core/Core/Src/ConfigManager.cpp index 84afc54a6d..fd328e12e2 100644 --- a/Source/Core/Core/Src/ConfigManager.cpp +++ b/Source/Core/Core/Src/ConfigManager.cpp @@ -409,7 +409,6 @@ void SConfig::LoadSettings() ini.Get("Core", "SlotB", (int*)&m_EXIDevice[1], EXIDEVICE_NONE); ini.Get("Core", "SerialPort1", (int*)&m_EXIDevice[2], EXIDEVICE_NONE); ini.Get("Core", "BBA_MAC", &m_bba_mac); - ini.Get("Core", "ProfiledReJIT",&m_LocalCoreStartupParameter.bJITProfiledReJIT, false); ini.Get("Core", "TimeProfiling",&m_LocalCoreStartupParameter.bJITILTimeProfiling, false); ini.Get("Core", "OutputIR", &m_LocalCoreStartupParameter.bJITILOutputIR, false); char sidevicenum[16]; diff --git a/Source/Core/Core/Src/CoreParameter.cpp b/Source/Core/Core/Src/CoreParameter.cpp index c22f5a15ba..4cba012ea4 100644 --- a/Source/Core/Core/Src/CoreParameter.cpp +++ b/Source/Core/Core/Src/CoreParameter.cpp @@ -28,7 +28,7 @@ SCoreStartupParameter::SCoreStartupParameter() bJITLoadStoreFloatingOff(false), bJITLoadStorePairedOff(false), bJITFloatingPointOff(false), bJITIntegerOff(false), bJITPairedOff(false), bJITSystemRegistersOff(false), - bJITBranchOff(false), bJITProfiledReJIT(false), + bJITBranchOff(false), bJITILTimeProfiling(false), bJITILOutputIR(false), bEnableFPRF(false), bCPUThread(true), bDSPThread(false), bDSPHLE(true), diff --git a/Source/Core/Core/Src/CoreParameter.h b/Source/Core/Core/Src/CoreParameter.h index 9821580c84..caef1d4bbb 100644 --- a/Source/Core/Core/Src/CoreParameter.h +++ b/Source/Core/Core/Src/CoreParameter.h @@ -111,7 +111,6 @@ struct SCoreStartupParameter bool bJITPairedOff; bool bJITSystemRegistersOff; bool bJITBranchOff; - bool bJITProfiledReJIT; bool bJITILTimeProfiling; bool bJITILOutputIR; diff --git a/Source/Core/Core/Src/HW/HW.cpp b/Source/Core/Core/Src/HW/HW.cpp index cef3666d59..586344ecc6 100644 --- a/Source/Core/Core/Src/HW/HW.cpp +++ b/Source/Core/Core/Src/HW/HW.cpp @@ -3,7 +3,6 @@ // Refer to the license.txt file included. #include "Common.h" -#include "Thunk.h" #include "../Core.h" #include "HW.h" #include "../PowerPC/PowerPC.h" diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp index f2bff458ae..2b14810b3e 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp @@ -12,7 +12,6 @@ #include "Common.h" #include "x64Emitter.h" #include "x64ABI.h" -#include "Thunk.h" #include "../../HLE/HLE.h" #include "../../Core.h" #include "../../PatchEngine.h" @@ -552,7 +551,10 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc { js.fifoBytesThisBlock -= 32; MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write - ABI_CallFunction(thunks.ProtectFunction((void *)&GPFifo::CheckGatherPipe, 0)); + u32 registersInUse = RegistersInUse(); + ABI_PushRegistersAndAdjustStack(registersInUse, false); + ABI_CallFunction((void *)&GPFifo::CheckGatherPipe); + ABI_PopRegistersAndAdjustStack(registersInUse, false); } u32 function = HLE::GetFunctionIndex(ops[i].address); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h index bb1c3a4a19..13ec88e0e1 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h @@ -234,6 +234,4 @@ public: void icbi(UGeckoInstruction inst); }; -void ProfiledReJit(); - #endif // _JIT64_H diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp index 310deaa9c2..2dba16cff3 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp @@ -3,7 +3,6 @@ // Refer to the license.txt file included. #include "Common.h" -#include "Thunk.h" #include "../../Core.h" #include "../PowerPC.h" diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp index a94b6977b1..37d274fcc8 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp @@ -6,7 +6,6 @@ // Should give a very noticable speed boost to paired single heavy code. #include "Common.h" -#include "Thunk.h" #include "../PowerPC.h" #include "../../Core.h" @@ -120,21 +119,20 @@ void Jit64::lXXx(UGeckoInstruction inst) // do our job at first s32 offset = (s32)(s16)inst.SIMM_16; - gpr.Lock(d); - SafeLoadToEAX(gpr.R(a), accessSize, offset, RegistersInUse(), signExtend); - gpr.KillImmediate(d, false, true); - MOV(32, gpr.R(d), R(EAX)); - gpr.UnlockAll(); + gpr.BindToRegister(d, false, true); + SafeLoadToReg(gpr.RX(d), gpr.R(a), accessSize, offset, RegistersInUse(), signExtend); - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); - // if it's still 0, we can wait until the next event - TEST(32, R(EAX), R(EAX)); + TEST(32, gpr.R(d), gpr.R(d)); FixupBranch noIdle = J_CC(CC_NZ); + u32 registersInUse = RegistersInUse(); + ABI_PushRegistersAndAdjustStack(registersInUse, false); + ABI_CallFunctionC((void *)&PowerPC::OnIdle, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16); + ABI_PopRegistersAndAdjustStack(registersInUse, false); + // ! we must continue executing of the loop after exception handling, maybe there is still 0 in r0 //MOV(32, M(&PowerPC::ppcState.pc), Imm32(js.compilerPC)); WriteExceptionExit(); @@ -174,18 +172,32 @@ void Jit64::lXXx(UGeckoInstruction inst) { if ((inst.OPCD != 31) && gpr.R(a).IsImm()) { - opAddress = Imm32((u32)gpr.R(a).offset + (s32)inst.SIMM_16); + u32 val = (u32)gpr.R(a).offset + (s32)inst.SIMM_16; + opAddress = Imm32(val); + if (update) + gpr.SetImmediate32(a, val); } else if ((inst.OPCD == 31) && gpr.R(a).IsImm() && gpr.R(b).IsImm()) { - opAddress = Imm32((u32)gpr.R(a).offset + (u32)gpr.R(b).offset); + u32 val = (u32)gpr.R(a).offset + (u32)gpr.R(b).offset; + opAddress = Imm32(val); + if (update) + gpr.SetImmediate32(a, val); } else { - gpr.FlushLockX(ABI_PARAM1); - opAddress = R(ABI_PARAM1); - MOV(32, opAddress, gpr.R(a)); - + if (update || (inst.OPCD != 31 && inst.SIMM_16 == 0)) + { + gpr.BindToRegister(a, true, update); + opAddress = gpr.R(a); + } + else + { + gpr.FlushLockX(ABI_PARAM1); + opAddress = R(ABI_PARAM1); + MOV(32, opAddress, gpr.R(a)); + } + if (inst.OPCD == 31) ADD(32, opAddress, gpr.R(b)); else @@ -193,29 +205,9 @@ void Jit64::lXXx(UGeckoInstruction inst) } } - SafeLoadToEAX(opAddress, accessSize, 0, RegistersInUse(), signExtend); - - // We must flush immediate values from the following registers because - // they may change at runtime if no MMU exception has been raised - gpr.KillImmediate(d, true, true); - if (update) - { - gpr.Lock(a); - gpr.BindToRegister(a, true, true); - } - - MEMCHECK_START - - if (update) - { - if (inst.OPCD == 31) - ADD(32, gpr.R(a), gpr.R(b)); - else - ADD(32, gpr.R(a), Imm32((u32)(s32)inst.SIMM_16)); - } - MOV(32, gpr.R(d), R(EAX)); - - MEMCHECK_END + gpr.Lock(a, b, d); + gpr.BindToRegister(d, false, true); + SafeLoadToReg(gpr.RX(d), opAddress, accessSize, 0, RegistersInUse(), signExtend); gpr.UnlockAll(); gpr.UnlockAllX(); @@ -318,12 +310,15 @@ void Jit64::stX(UGeckoInstruction inst) else { MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write + u32 registersInUse = RegistersInUse(); + ABI_PushRegistersAndAdjustStack(registersInUse, false); switch (accessSize) { - case 32: ABI_CallFunctionAC(thunks.ProtectFunction(true ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), 2), gpr.R(s), addr); break; - case 16: ABI_CallFunctionAC(thunks.ProtectFunction(true ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), 2), gpr.R(s), addr); break; - case 8: ABI_CallFunctionAC(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), gpr.R(s), addr); break; + case 32: ABI_CallFunctionAC(true ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), gpr.R(s), addr); break; + case 16: ABI_CallFunctionAC(true ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), gpr.R(s), addr); break; + case 8: ABI_CallFunctionAC((void *)&Memory::Write_U8, gpr.R(s), addr); break; } + ABI_PopRegistersAndAdjustStack(registersInUse, false); if (update) gpr.SetImmediate32(a, addr); return; diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index e8d4465ebb..ab8e417792 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -50,7 +50,7 @@ void Jit64::lfs(UGeckoInstruction inst) } s32 offset = (s32)(s16)inst.SIMM_16; - SafeLoadToEAX(gpr.R(a), 32, offset, RegistersInUse(), false); + SafeLoadToReg(EAX, gpr.R(a), 32, offset, RegistersInUse(), false); MEMCHECK_START @@ -209,6 +209,7 @@ void Jit64::stfd(UGeckoInstruction inst) MOVD_xmm(R(EAX), XMM0); SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, RegistersInUse() | (1 << (16 + XMM0))); + MOVAPD(XMM0, fpr.R(s)); MOVD_xmm(R(EAX), XMM0); LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset)); SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4, RegistersInUse()); @@ -338,7 +339,7 @@ void Jit64::lfsx(UGeckoInstruction inst) MEMCHECK_END } else { - SafeLoadToEAX(R(EAX), 32, 0, RegistersInUse(), false); + SafeLoadToReg(EAX, R(EAX), 32, 0, RegistersInUse(), false); MEMCHECK_START diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 00066f4746..4548890e2c 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -7,7 +7,6 @@ #include "Common.h" -#include "Thunk.h" #include "../PowerPC.h" #include "../../Core.h" #include "../../HW/GPFifo.h" @@ -106,15 +105,11 @@ void Jit64::psq_st(UGeckoInstruction inst) // One value XORPS(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions. CVTSD2SS(XMM0, fpr.R(s)); - ABI_AlignStack(0); CALLptr(MScaled(EDX, addr_scale, (u32)(u64)asm_routines.singleStoreQuantized)); - ABI_RestoreStack(0); } else { // Pair of values CVTPD2PS(XMM0, fpr.R(s)); - ABI_AlignStack(0); CALLptr(MScaled(EDX, addr_scale, (u32)(u64)asm_routines.pairedStoreQuantized)); - ABI_RestoreStack(0); } gpr.UnlockAll(); gpr.UnlockAllX(); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp index e316ae212c..92804a304c 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -11,7 +11,6 @@ #include "../PPCTables.h" #include "x64Emitter.h" #include "x64ABI.h" -#include "Thunk.h" #include "Jit.h" #include "JitRegCache.h" diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp index 82beace052..6ffdbad7ad 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp @@ -123,7 +123,6 @@ Fix profiled loads/stores to work safely. On 32-bit, one solution is to #include "IR.h" #include "../PPCTables.h" #include "../../CoreTiming.h" -#include "Thunk.h" #include "../../HW/Memmap.h" #include "JitILAsm.h" #include "JitIL.h" diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp index ecd44134fa..6eff9056eb 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp @@ -27,7 +27,6 @@ The register allocation is linear scan allocation. #include "IR.h" #include "../PPCTables.h" #include "../../CoreTiming.h" -#include "Thunk.h" #include "../../HW/Memmap.h" #include "JitILAsm.h" #include "JitIL.h" @@ -39,8 +38,6 @@ The register allocation is linear scan allocation. #include "../../Core.h" #include "HW/ProcessorInterface.h" -static ThunkManager thunks; - using namespace IREmitter; using namespace Gen; @@ -56,9 +53,6 @@ struct RegInfo { InstLoc fregs[MAX_NUMBER_OF_REGS]; unsigned numSpills; unsigned numFSpills; - bool MakeProfile; - bool UseProfile; - unsigned numProfiledLoads; unsigned exitNumber; RegInfo(JitIL* j, InstLoc f, unsigned insts) : Jit(j), FirstI(f), IInfo(insts), lastUsed(insts) { @@ -68,9 +62,7 @@ struct RegInfo { } numSpills = 0; numFSpills = 0; - numProfiledLoads = 0; exitNumber = 0; - MakeProfile = UseProfile = false; } private: @@ -106,7 +98,6 @@ static unsigned regReadUse(RegInfo& R, InstLoc I) { } static unsigned SlotSet[1000]; -static unsigned ProfiledLoads[1000]; static u8 GC_ALIGNED16(FSlotSet[16*1000]); static OpArg regLocForSlot(RegInfo& RI, unsigned slot) { @@ -440,47 +431,15 @@ static void regMarkMemAddress(RegInfo& RI, InstLoc I, InstLoc AI, unsigned OpNum regMarkUse(RI, I, AI, OpNum); } -static void regClearDeadMemAddress(RegInfo& RI, InstLoc I, InstLoc AI, unsigned OpNum) { - if (!(RI.IInfo[I - RI.FirstI] & (2 << OpNum))) - return; - if (isImm(*AI)) { - unsigned addr = RI.Build->GetImmValue(AI); - if (Memory::IsRAMAddress(addr)) { - return; - } - } - InstLoc AddrBase; - if (getOpcode(*AI) == Add && isImm(*getOp2(AI))) { - AddrBase = getOp1(AI); - } else { - AddrBase = AI; - } - regClearInst(RI, AddrBase); -} - // in 64-bit build, this returns a completely bizarre address sometimes! -static OpArg regBuildMemAddress(RegInfo& RI, InstLoc I, InstLoc AI, - unsigned OpNum, unsigned Size, X64Reg* dest, - bool Profiled, - unsigned ProfileOffset = 0) { +static std::pair regBuildMemAddress(RegInfo& RI, InstLoc I, + InstLoc AI, unsigned OpNum, unsigned Size, X64Reg* dest) { if (isImm(*AI)) { - unsigned addr = RI.Build->GetImmValue(AI); + unsigned addr = RI.Build->GetImmValue(AI); if (Memory::IsRAMAddress(addr)) { if (dest) *dest = regFindFreeReg(RI); -#ifdef _M_IX86 - // 32-bit - if (Profiled) - return M((void*)((u8*)Memory::base + (addr & Memory::MEMVIEW32_MASK))); - return M((void*)addr); -#else - // 64-bit - if (Profiled) { - RI.Jit->LEA(32, EAX, M((void*)(u64)addr)); - return MComplex(RBX, EAX, SCALE_1, 0); - } - return M((void*)(u64)addr); -#endif + return std::make_pair(Imm32(addr), 0); } } unsigned offset; @@ -513,86 +472,18 @@ static OpArg regBuildMemAddress(RegInfo& RI, InstLoc I, InstLoc AI, baseReg = regEnsureInReg(RI, AddrBase); } - if (Profiled) { - // (Profiled mode isn't the default, at least for the moment) -#ifdef _M_IX86 - return MDisp(baseReg, (u32)Memory::base + offset + ProfileOffset); -#else - RI.Jit->LEA(32, EAX, MDisp(baseReg, offset)); - return MComplex(RBX, EAX, SCALE_1, 0); -#endif - } - return MDisp(baseReg, offset); + return std::make_pair(R(baseReg), offset); } static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) { - if (RI.UseProfile) { - unsigned curLoad = ProfiledLoads[RI.numProfiledLoads++]; - if (!(curLoad & 0x0C000000)) { - X64Reg reg; - OpArg addr = regBuildMemAddress(RI, I, getOp1(I), 1, - Size, ®, true, - -(curLoad & 0xC0000000)); - RI.Jit->MOVZX(32, Size, reg, addr); - RI.Jit->BSWAP(Size, reg); - if (regReadUse(RI, I)) - RI.regs[reg] = I; - return; - } - } X64Reg reg; - OpArg addr = regBuildMemAddress(RI, I, getOp1(I), 1, Size, ®, false); - RI.Jit->LEA(32, ECX, addr); - if (RI.MakeProfile) { - RI.Jit->MOV(32, M(&ProfiledLoads[RI.numProfiledLoads++]), R(ECX)); - } - u32 mem_mask = 0; + auto info = regBuildMemAddress(RI, I, getOp1(I), 1, Size, ®); - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU || SConfig::GetInstance().m_LocalCoreStartupParameter.bTLBHack) - mem_mask = 0x20000000; - - RI.Jit->TEST(32, R(ECX), Imm32(0x0C000000 | mem_mask)); - FixupBranch argh = RI.Jit->J_CC(CC_Z); - - // Slow safe read using Memory::Read_Ux routines -#ifdef _M_IX86 // we don't allocate EAX on x64 so no reason to save it. - if (reg != EAX) { - RI.Jit->PUSH(32, R(EAX)); - } -#endif - switch (Size) - { - case 32: RI.Jit->ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), ECX); break; - case 16: RI.Jit->ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), ECX); break; - case 8: RI.Jit->ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), ECX); break; - } - if (reg != EAX) { - RI.Jit->MOV(32, R(reg), R(EAX)); -#ifdef _M_IX86 - RI.Jit->POP(32, R(EAX)); -#endif - } - FixupBranch arg2 = RI.Jit->J(); - RI.Jit->SetJumpTarget(argh); - RI.Jit->UnsafeLoadRegToReg(ECX, reg, Size, 0, false); - RI.Jit->SetJumpTarget(arg2); + RI.Jit->SafeLoadToReg(reg, info.first, Size, info.second, regsInUse(RI), false); if (regReadUse(RI, I)) RI.regs[reg] = I; } -static OpArg regSwappedImmForConst(RegInfo& RI, InstLoc I, unsigned Size) { - unsigned imm = RI.Build->GetImmValue(I); - if (Size == 32) { - imm = Common::swap32(imm); - return Imm32(imm); - } else if (Size == 16) { - imm = Common::swap16(imm); - return Imm16(imm); - } else { - return Imm8(imm); - } -} - static OpArg regImmForConst(RegInfo& RI, InstLoc I, unsigned Size) { unsigned imm = RI.Build->GetImmValue(I); if (Size == 32) { @@ -605,52 +496,17 @@ static OpArg regImmForConst(RegInfo& RI, InstLoc I, unsigned Size) { } static void regEmitMemStore(RegInfo& RI, InstLoc I, unsigned Size) { - if (RI.UseProfile) { - unsigned curStore = ProfiledLoads[RI.numProfiledLoads++]; - if (!(curStore & 0x0C000000)) { - OpArg addr = regBuildMemAddress(RI, I, getOp2(I), 2, - Size, 0, true, - -(curStore & 0xC0000000)); - if (isImm(*getOp1(I))) { - RI.Jit->MOV(Size, addr, regSwappedImmForConst(RI, getOp1(I), Size)); - } else { - RI.Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); - RI.Jit->BSWAP(Size, ECX); - RI.Jit->MOV(Size, addr, R(ECX)); - } - if (RI.IInfo[I - RI.FirstI] & 4) - regClearInst(RI, getOp1(I)); - return; - } else if ((curStore & 0xFFFFF000) == 0xCC008000) { - regSpill(RI, EAX); - if (isImm(*getOp1(I))) { - RI.Jit->MOV(Size, R(ECX), regSwappedImmForConst(RI, getOp1(I), Size)); - } else { - RI.Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); - RI.Jit->BSWAP(Size, ECX); - } - RI.Jit->MOV(32, R(EAX), M(&GPFifo::m_gatherPipeCount)); - RI.Jit->MOV(Size, MDisp(EAX, (u32)(u64)GPFifo::m_gatherPipe), R(ECX)); - RI.Jit->ADD(32, R(EAX), Imm8(Size >> 3)); - RI.Jit->MOV(32, M(&GPFifo::m_gatherPipeCount), R(EAX)); - RI.Jit->js.fifoBytesThisBlock += Size >> 3; - if (RI.IInfo[I - RI.FirstI] & 4) - regClearInst(RI, getOp1(I)); - regClearDeadMemAddress(RI, I, getOp2(I), 2); - return; - } - } - OpArg addr = regBuildMemAddress(RI, I, getOp2(I), 2, Size, 0, false); - RI.Jit->LEA(32, ECX, addr); + auto info = regBuildMemAddress(RI, I, getOp2(I), 2, Size, 0); + if (info.first.IsImm()) + RI.Jit->MOV(32, R(ECX), info.first); + else + RI.Jit->LEA(32, ECX, MDisp(info.first.GetSimpleReg(), info.second)); regSpill(RI, EAX); if (isImm(*getOp1(I))) { RI.Jit->MOV(Size, R(EAX), regImmForConst(RI, getOp1(I), Size)); } else { RI.Jit->MOV(32, R(EAX), regLocForInst(RI, getOp1(I))); } - if (RI.MakeProfile) { - RI.Jit->MOV(32, M(&ProfiledLoads[RI.numProfiledLoads++]), R(ECX)); - } RI.Jit->SafeWriteRegToReg(EAX, ECX, Size, 0, regsInUse(RI)); if (RI.IInfo[I - RI.FirstI] & 4) regClearInst(RI, getOp1(I)); @@ -704,18 +560,6 @@ static void regEmitICmpInst(RegInfo& RI, InstLoc I, CCFlags flag) { } static void regWriteExit(RegInfo& RI, InstLoc dest) { - if (RI.MakeProfile) { - if (isImm(*dest)) { - RI.Jit->MOV(32, M(&PC), Imm32(RI.Build->GetImmValue(dest))); - } else { - RI.Jit->MOV(32, R(EAX), regLocForInst(RI, dest)); - RI.Jit->MOV(32, M(&PC), R(EAX)); - } - RI.Jit->Cleanup(); - RI.Jit->SUB(32, M(&CoreTiming::downcount), Imm32(RI.Jit->js.downcountAmount)); - RI.Jit->JMP(((JitIL *)jit)->asm_routines.doReJit, true); - return; - } if (isImm(*dest)) { RI.Jit->WriteExit(RI.Build->GetImmValue(dest), RI.exitNumber++); } else { @@ -729,12 +573,10 @@ static bool checkIsSNAN() { return MathUtil::IsSNAN(isSNANTemp[0][0]) || MathUtil::IsSNAN(isSNANTemp[1][0]); } -static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool MakeProfile) { +static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit) { //printf("Writing block: %x\n", js.blockStart); RegInfo RI(Jit, ibuild->getFirstInst(), ibuild->getNumInsts()); RI.Build = ibuild; - RI.UseProfile = UseProfile; - RI.MakeProfile = MakeProfile; // Pass to compute liveness ibuild->StartBackPass(); for (unsigned int index = (unsigned int)RI.IInfo.size() - 1; index != -1U; --index) { @@ -1997,22 +1839,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak } } - //if (!RI.MakeProfile && RI.numSpills) - // printf("Block: %x, numspills %d\n", Jit->js.blockStart, RI.numSpills); - Jit->WriteExit(jit->js.curBlock->exitAddress[0], 0); Jit->UD2(); } void JitIL::WriteCode() { - DoWriteCode(&ibuild, this, false, SConfig::GetInstance().m_LocalCoreStartupParameter.bJITProfiledReJIT); -} - -void ProfiledReJit() { - JitIL *jitil = (JitIL *)jit; - jitil->SetCodePtr(jitil->js.rewriteStart); - DoWriteCode(&jitil->ibuild, jitil, true, false); - jitil->js.curBlock->codeSize = (int)(jitil->GetCodePtr() - jitil->js.rewriteStart); - jitil->GetBlockCache()->FinalizeBlock(jitil->js.curBlock->blockNum, jitil->jo.enableBlocklink, - jitil->js.curBlock->normalEntry); + DoWriteCode(&ibuild, this); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp index 4b04415add..acd8baa002 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp @@ -7,7 +7,6 @@ #include "Common.h" #include "x64Emitter.h" #include "x64ABI.h" -#include "Thunk.h" #include "../../HLE/HLE.h" #include "../../Core.h" #include "../../PatchEngine.h" diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h index 30371311a3..770b26120c 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.h @@ -221,6 +221,4 @@ public: void Jit(u32 em_address); -void ProfiledReJit(); - #endif // _JITIL_H diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.cpp index 9750a0e690..57c5212b33 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.cpp @@ -13,7 +13,6 @@ #include "CPUDetect.h" #include "x64ABI.h" -#include "Thunk.h" #include "../../HW/GPFifo.h" #include "../../Core.h" @@ -233,13 +232,6 @@ void JitILAsmRoutineManager::GenerateCommon() fifoDirectWriteXmm64 = AlignCode4(); GenFifoXmm64Write(); - doReJit = AlignCode4(); - ABI_AlignStack(0); - CALL(reinterpret_cast(&ProfiledReJit)); - ABI_RestoreStack(0); - SUB(32, M(&CoreTiming::downcount), Imm8(0)); - JMP(dispatcher, true); - GenQuantizedLoads(); GenQuantizedStores(); GenQuantizedSingleStores(); diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.h b/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.h index 8222e897c3..cf4dd6e155 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.h +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitILAsm.h @@ -38,8 +38,6 @@ public: void Shutdown() { FreeCodeSpace(); } - - const u8 *doReJit; }; extern JitILAsmRoutineManager jitil_asm_routines; diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Branch.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Branch.cpp index d0ea86225d..55bed551a8 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Branch.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Branch.cpp @@ -3,7 +3,6 @@ // Refer to the license.txt file included. #include "Common.h" -#include "Thunk.h" #include "../../ConfigManager.h" #include "../PowerPC.h" diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStore.cpp index 98b6726227..b8f561e05f 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStore.cpp @@ -6,7 +6,6 @@ // Should give a very noticable speed boost to paired single heavy code. #include "Common.h" -#include "Thunk.h" #include "../PowerPC.h" #include "../../Core.h" diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStorePaired.cpp index 33b81623c1..e371fa6a39 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStorePaired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_LoadStorePaired.cpp @@ -4,7 +4,6 @@ #include "Common.h" -#include "Thunk.h" #include "../PowerPC.h" #include "../../Core.h" #include "../../HW/GPFifo.h" diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_SystemRegisters.cpp index cb3ab91b32..38f55db982 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_SystemRegisters.cpp @@ -11,7 +11,6 @@ #include "../PPCTables.h" #include "x64Emitter.h" #include "x64ABI.h" -#include "Thunk.h" #include "JitIL.h" diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Branch.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Branch.cpp index b6bde154af..dc24366df7 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Branch.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Branch.cpp @@ -15,7 +15,6 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ #include "Common.h" -#include "Thunk.h" #include "../../Core.h" #include "../PowerPC.h" diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp index cdf1bd89af..e27bb90e69 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp @@ -16,7 +16,6 @@ // http://code.google.com/p/dolphin-emu/ #include "Common.h" -#include "Thunk.h" #include "../../Core.h" #include "../PowerPC.h" diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Integer.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Integer.cpp index 3b57351568..0df1cd965e 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Integer.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Integer.cpp @@ -15,7 +15,6 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ #include "Common.h" -#include "Thunk.h" #include "../../Core.h" #include "../PowerPC.h" diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStore.cpp index f5296a17d3..1c5c55a369 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStore.cpp @@ -16,7 +16,6 @@ // http://code.google.com/p/dolphin-emu/ #include "Common.h" -#include "Thunk.h" #include "../../Core.h" #include "../PowerPC.h" diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp index c8a773e2c9..c2f2f626ac 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp @@ -16,7 +16,6 @@ // http://code.google.com/p/dolphin-emu/ #include "Common.h" -#include "Thunk.h" #include "../../Core.h" #include "../PowerPC.h" diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStorePaired.cpp index ab90f41655..d694fbd79b 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStorePaired.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStorePaired.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 // Refer to the license.txt file included. #include "Common.h" -#include "Thunk.h" #include "../../Core.h" #include "../PowerPC.h" diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Paired.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Paired.cpp index 185af91a9d..744fbc1940 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Paired.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Paired.cpp @@ -15,7 +15,6 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ #include "Common.h" -#include "Thunk.h" #include "../../Core.h" #include "../PowerPC.h" diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_SystemRegisters.cpp index 3dcf5fe099..13f90fb684 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_SystemRegisters.cpp @@ -15,7 +15,6 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ #include "Common.h" -#include "Thunk.h" #include "../../Core.h" #include "../PowerPC.h" diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp index da210e2606..cb763db179 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp @@ -3,7 +3,6 @@ // Refer to the license.txt file included. #include "x64ABI.h" -#include "Thunk.h" #include "CPUDetect.h" #include "x64Emitter.h" @@ -21,6 +20,9 @@ #include "JitAsmCommon.h" #include "JitBase.h" +#define QUANTIZED_REGS_TO_SAVE (ABI_ALL_CALLEE_SAVED & ~((1 << RAX) | (1 << RCX) | (1 << RDX) | \ + (1 << XMM0) | (1 << XMM1))) + using namespace Gen; static int temp32; @@ -142,14 +144,10 @@ static const float GC_ALIGNED16(m_one[]) = {1.0f, 0.0f, 0.0f, 0.0f}; // I don't know whether the overflow actually happens in any games // but it potentially can cause problems, so we need some clamping -#ifdef _M_X64 -// TODO(ector): Improve 64-bit version -static void WriteDual32(u64 value, u32 address) +static void WriteDual32(u32 address) { - Memory::Write_U32((u32)(value >> 32), address); - Memory::Write_U32((u32)value, address + 4); + Memory::Write_U64(*(u64 *) psTemp, address); } -#endif // See comment in header for in/outs. void CommonAsmRoutines::GenQuantizedStores() { @@ -162,18 +160,20 @@ void CommonAsmRoutines::GenQuantizedStores() { MOVQ_xmm(M(&psTemp[0]), XMM0); MOV(64, R(RAX), M(&psTemp[0])); TEST(32, R(ECX), Imm32(0x0C000000)); - FixupBranch too_complex = J_CC(CC_NZ); + FixupBranch too_complex = J_CC(CC_NZ, true); BSWAP(64, RAX); MOV(64, MComplex(RBX, RCX, SCALE_1, 0), R(RAX)); - FixupBranch skip_complex = J(); + FixupBranch skip_complex = J(true); SetJumpTarget(too_complex); - ABI_CallFunctionRR(thunks.ProtectFunction((void *)&WriteDual32, 2), RAX, RCX, /* noProlog = */ true); + ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true); + ABI_CallFunctionR((void *)&WriteDual32, RCX); + ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true); SetJumpTarget(skip_complex); RET(); #else MOVQ_xmm(M(&psTemp[0]), XMM0); TEST(32, R(ECX), Imm32(0x0C000000)); - FixupBranch argh = J_CC(CC_NZ); + FixupBranch argh = J_CC(CC_NZ, true); MOV(32, R(EAX), M(&psTemp)); BSWAP(32, EAX); AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK)); @@ -181,13 +181,11 @@ void CommonAsmRoutines::GenQuantizedStores() { MOV(32, R(EAX), M(((char*)&psTemp) + 4)); BSWAP(32, EAX); MOV(32, MDisp(ECX, 4+(u32)Memory::base), R(EAX)); - FixupBranch arg2 = J(); + FixupBranch arg2 = J(true); SetJumpTarget(argh); - MOV(32, R(EAX), M(((char*)&psTemp))); - ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX, /* noProlog = */ true); - MOV(32, R(EAX), M(((char*)&psTemp)+4)); - ADD(32, R(ECX), Imm32(4)); - ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX, /* noProlog = */ true); + ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true); + ABI_CallFunctionR((void *)&WriteDual32, ECX); + ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true); SetJumpTarget(arg2); RET(); #endif @@ -206,8 +204,8 @@ void CommonAsmRoutines::GenQuantizedStores() { PACKSSDW(XMM0, R(XMM0)); PACKUSWB(XMM0, R(XMM0)); MOVD_xmm(R(EAX), XMM0); - SafeWriteRegToReg(AX, ECX, 16, 0, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); - + SafeWriteRegToReg(AX, ECX, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); + RET(); const u8* storePairedS8 = AlignCode4(); @@ -225,8 +223,8 @@ void CommonAsmRoutines::GenQuantizedStores() { PACKSSWB(XMM0, R(XMM0)); MOVD_xmm(R(EAX), XMM0); - SafeWriteRegToReg(AX, ECX, 16, 0, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); - + SafeWriteRegToReg(AX, ECX, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); + RET(); const u8* storePairedU16 = AlignCode4(); @@ -251,8 +249,8 @@ void CommonAsmRoutines::GenQuantizedStores() { MOV(16, R(AX), M((char*)psTemp + 4)); BSWAP(32, EAX); - SafeWriteRegToReg(EAX, ECX, 32, 0, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); - + SafeWriteRegToReg(EAX, ECX, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); + RET(); const u8* storePairedS16 = AlignCode4(); @@ -271,8 +269,8 @@ void CommonAsmRoutines::GenQuantizedStores() { MOVD_xmm(R(EAX), XMM0); BSWAP(32, EAX); ROL(32, R(EAX), Imm8(16)); - SafeWriteRegToReg(EAX, ECX, 32, 0, 0, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); - + SafeWriteRegToReg(EAX, ECX, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_WRITE_NO_SWAP | SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); + RET(); pairedStoreQuantized = reinterpret_cast(const_cast(AlignCode16())); @@ -295,7 +293,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() { // Easy! const u8* storeSingleFloat = AlignCode4(); - SafeWriteFloatToReg(XMM0, ECX, 0, SAFE_WRITE_NO_FASTMEM); + SafeWriteFloatToReg(XMM0, ECX, QUANTIZED_REGS_TO_SAVE, SAFE_WRITE_NO_FASTMEM); RET(); /* if (cpu_info.bSSSE3) { @@ -318,7 +316,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() { MAXSS(XMM0, R(XMM1)); MINSS(XMM0, M((void *)&m_255)); CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(AL, ECX, 8, 0, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); + SafeWriteRegToReg(AL, ECX, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); RET(); const u8* storeSingleS8 = AlignCode4(); @@ -328,7 +326,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() { MAXSS(XMM0, M((void *)&m_m128)); MINSS(XMM0, M((void *)&m_127)); CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(AL, ECX, 8, 0, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); + SafeWriteRegToReg(AL, ECX, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); RET(); const u8* storeSingleU16 = AlignCode4(); // Used by MKWii @@ -339,7 +337,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() { MAXSS(XMM0, R(XMM1)); MINSS(XMM0, M((void *)&m_65535)); CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(EAX, ECX, 16, 0, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); + SafeWriteRegToReg(EAX, ECX, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); RET(); const u8* storeSingleS16 = AlignCode4(); @@ -349,7 +347,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() { MAXSS(XMM0, M((void *)&m_m32768)); MINSS(XMM0, M((void *)&m_32767)); CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(EAX, ECX, 16, 0, 0, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); + SafeWriteRegToReg(EAX, ECX, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_WRITE_NO_PROLOG | SAFE_WRITE_NO_FASTMEM); RET(); singleStoreQuantized = reinterpret_cast(const_cast(AlignCode16())); diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.h b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.h index 58d7c5fe6e..67398f6f6b 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.h +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.h @@ -6,7 +6,6 @@ #define _JITASMCOMMON_H #include "../JitCommon/Jit_Util.h" -#include "Thunk.h" class CommonAsmRoutinesBase { public: @@ -65,9 +64,6 @@ public: void GenFifoXmm64Write(); void GenFifoFloatWrite(); -private: - ThunkManager thunks; - }; #endif diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.cpp index da7f7c1c27..a325ca1884 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.cpp @@ -13,7 +13,6 @@ #include "x64Emitter.h" #include "x64ABI.h" -#include "Thunk.h" #include "x64Analyzer.h" #include "StringUtil.h" @@ -76,7 +75,7 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re if (info.displacement) { ADD(32, R(ABI_PARAM1), Imm32(info.displacement)); } - PushRegistersAndAlignStack(registersInUse); + ABI_PushRegistersAndAdjustStack(registersInUse, true); switch (info.operandSize) { case 4: @@ -96,7 +95,7 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re MOV(32, R(dataReg), R(EAX)); } - PopRegistersAndAlignStack(registersInUse); + ABI_PopRegistersAndAdjustStack(registersInUse, true); RET(); #endif return trampoline; @@ -137,7 +136,7 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r ADD(32, R(ABI_PARAM2), Imm32(info.displacement)); } - PushRegistersAndAlignStack(registersInUse); + ABI_PushRegistersAndAdjustStack(registersInUse, true); switch (info.operandSize) { case 8: @@ -154,7 +153,7 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r break; } - PopRegistersAndAlignStack(registersInUse); + ABI_PopRegistersAndAdjustStack(registersInUse, true); RET(); #endif @@ -177,15 +176,23 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void) InstructionInfo info; if (!DisassembleMov(codePtr, &info)) { BackPatchError("BackPatch - failed to disassemble MOV instruction", codePtr, emAddress); + return 0; } if (info.otherReg != RBX) + { PanicAlert("BackPatch : Base reg not RBX." "\n\nAttempted to access %08x.", emAddress); + return 0; + } auto it = registersInUseAtLoc.find(codePtr); if (it == registersInUseAtLoc.end()) + { PanicAlert("BackPatch: no register use entry for address %p", codePtr); + return 0; + } + u32 registersInUse = it->second; if (!info.isMemoryWrite) @@ -235,7 +242,6 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void) emitter.NOP(codePtr + info.instructionSize - emitter.GetCodePtr()); return start; } - return 0; #else return 0; #endif diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.h b/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.h index 59310b6ecf..3dc48dd05d 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.h +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitBackpatch.h @@ -8,7 +8,6 @@ #include "Common.h" #include "x64Emitter.h" #include "x64Analyzer.h" -#include "Thunk.h" // meh. #if defined(_WIN32) @@ -234,8 +233,6 @@ public: const u8 *GetReadTrampoline(const InstructionInfo &info, u32 registersInUse); const u8 *GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse); -private: - ThunkManager thunks; }; #endif diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp index 79833e883c..69cfdc8bd3 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp @@ -3,7 +3,6 @@ // Refer to the license.txt file included. #include "Common.h" -#include "Thunk.h" #include "CPUDetect.h" #include "../PowerPC.h" @@ -58,34 +57,46 @@ void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, i #endif } -u8 *EmuCodeBlock::UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend) +u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset, bool signExtend) { u8 *result; #ifdef _M_X64 if (opAddress.IsSimpleReg()) { + // Deal with potential wraparound. (This is just a heuristic, and it would + // be more correct to actually mirror the first page at the end, but the + // only case where it probably actually matters is JitIL turning adds into + // offsets with the wrong sign, so whatever. Since the original code + // *could* try to wrap an address around, however, this is the correct + // place to address the issue.) + if ((u32) offset >= 0x1000) { + LEA(32, reg_value, MDisp(opAddress.GetSimpleReg(), offset)); + opAddress = R(reg_value); + offset = 0; + } + result = GetWritableCodePtr(); - MOVZX(32, accessSize, EAX, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset)); + MOVZX(32, accessSize, reg_value, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset)); } else { - MOV(32, R(EAX), opAddress); + MOV(32, R(reg_value), opAddress); result = GetWritableCodePtr(); - MOVZX(32, accessSize, EAX, MComplex(RBX, EAX, SCALE_1, offset)); + MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_value, SCALE_1, offset)); } #else if (opAddress.IsImm()) { result = GetWritableCodePtr(); - MOVZX(32, accessSize, EAX, M(Memory::base + (((u32)opAddress.offset + offset) & Memory::MEMVIEW32_MASK))); + MOVZX(32, accessSize, reg_value, M(Memory::base + (((u32)opAddress.offset + offset) & Memory::MEMVIEW32_MASK))); } else { - if (!opAddress.IsSimpleReg(EAX)) - MOV(32, R(EAX), opAddress); - AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); + if (!opAddress.IsSimpleReg(reg_value)) + MOV(32, R(reg_value), opAddress); + AND(32, R(reg_value), Imm32(Memory::MEMVIEW32_MASK)); result = GetWritableCodePtr(); - MOVZX(32, accessSize, EAX, MDisp(EAX, (u32)Memory::base + offset)); + MOVZX(32, accessSize, reg_value, MDisp(reg_value, (u32)Memory::base + offset)); } #endif @@ -95,26 +106,27 @@ u8 *EmuCodeBlock::UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, if (accessSize == 32) { - BSWAP(32, EAX); + BSWAP(32, reg_value); } else if (accessSize == 16) { - BSWAP(32, EAX); + BSWAP(32, reg_value); if (signExtend) - SAR(32, R(EAX), Imm8(16)); + SAR(32, R(reg_value), Imm8(16)); else - SHR(32, R(EAX), Imm8(16)); + SHR(32, R(reg_value), Imm8(16)); } else if (signExtend) { // TODO: bake 8-bit into the original load. - MOVSX(32, accessSize, EAX, R(EAX)); + MOVSX(32, accessSize, reg_value, R(reg_value)); } return result; } -void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend) +void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend) { + registersInUse &= ~(1 << RAX | 1 << reg_value); #if defined(_M_X64) #ifdef ENABLE_MEM_CHECK if (!Core::g_CoreStartupParameter.bMMU && !Core::g_CoreStartupParameter.bEnableDebugging && Core::g_CoreStartupParameter.bFastmem) @@ -122,10 +134,8 @@ void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s if (!Core::g_CoreStartupParameter.bMMU && Core::g_CoreStartupParameter.bFastmem) #endif { - u8 *mov = UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend); + u8 *mov = UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend); - // XXX: are these dead anyway? - registersInUse &= ~((1 << ABI_PARAM1) | (1 << ABI_PARAM2) | (1 << RAX)); registersInUseAtLoc[mov] = registersInUse; } else @@ -149,20 +159,26 @@ void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s u32 address = (u32)opAddress.offset + offset; if ((address & mem_mask) == 0) { - UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend); + UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend); } else { + ABI_PushRegistersAndAdjustStack(registersInUse, false); switch (accessSize) { - case 32: ABI_CallFunctionC(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), address); break; - case 16: ABI_CallFunctionC(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), address); break; - case 8: ABI_CallFunctionC(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), address); break; + case 32: ABI_CallFunctionC((void *)&Memory::Read_U32, address); break; + case 16: ABI_CallFunctionC((void *)&Memory::Read_U16_ZX, address); break; + case 8: ABI_CallFunctionC((void *)&Memory::Read_U8_ZX, address); break; } + ABI_PopRegistersAndAdjustStack(registersInUse, false); if (signExtend && accessSize < 32) { // Need to sign extend values coming from the Read_U* functions. - MOVSX(32, accessSize, EAX, R(EAX)); + MOVSX(32, accessSize, reg_value, R(EAX)); + } + else if (reg_value != EAX) + { + MOVZX(32, accessSize, reg_value, R(EAX)); } } } @@ -173,45 +189,57 @@ void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s MOV(32, R(EAX), opAddress); ADD(32, R(EAX), Imm32(offset)); TEST(32, R(EAX), Imm32(mem_mask)); - FixupBranch fast = J_CC(CC_Z); + FixupBranch fast = J_CC(CC_Z, true); + ABI_PushRegistersAndAdjustStack(registersInUse, false); switch (accessSize) { - case 32: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), EAX); break; - case 16: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), EAX); break; - case 8: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), EAX); break; + case 32: ABI_CallFunctionR((void *)&Memory::Read_U32, EAX); break; + case 16: ABI_CallFunctionR((void *)&Memory::Read_U16_ZX, EAX); break; + case 8: ABI_CallFunctionR((void *)&Memory::Read_U8_ZX, EAX); break; } + ABI_PopRegistersAndAdjustStack(registersInUse, false); if (signExtend && accessSize < 32) { // Need to sign extend values coming from the Read_U* functions. - MOVSX(32, accessSize, EAX, R(EAX)); + MOVSX(32, accessSize, reg_value, R(EAX)); + } + else if (reg_value != EAX) + { + MOVZX(32, accessSize, reg_value, R(EAX)); } FixupBranch exit = J(); SetJumpTarget(fast); - UnsafeLoadToEAX(R(EAX), accessSize, 0, signExtend); + UnsafeLoadToReg(reg_value, R(EAX), accessSize, 0, signExtend); SetJumpTarget(exit); } else { TEST(32, opAddress, Imm32(mem_mask)); - FixupBranch fast = J_CC(CC_Z); + FixupBranch fast = J_CC(CC_Z, true); + ABI_PushRegistersAndAdjustStack(registersInUse, false); switch (accessSize) { - case 32: ABI_CallFunctionA(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), opAddress); break; - case 16: ABI_CallFunctionA(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), opAddress); break; - case 8: ABI_CallFunctionA(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), opAddress); break; + case 32: ABI_CallFunctionA((void *)&Memory::Read_U32, opAddress); break; + case 16: ABI_CallFunctionA((void *)&Memory::Read_U16_ZX, opAddress); break; + case 8: ABI_CallFunctionA((void *)&Memory::Read_U8_ZX, opAddress); break; } + ABI_PopRegistersAndAdjustStack(registersInUse, false); if (signExtend && accessSize < 32) { // Need to sign extend values coming from the Read_U* functions. - MOVSX(32, accessSize, EAX, R(EAX)); + MOVSX(32, accessSize, reg_value, R(EAX)); + } + else if (reg_value != EAX) + { + MOVZX(32, accessSize, reg_value, R(EAX)); } FixupBranch exit = J(); SetJumpTarget(fast); - UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend); + UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend); SetJumpTarget(exit); } } @@ -239,6 +267,7 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acc // Destroys both arg registers void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags) { + registersInUse &= ~(1 << RAX); #if defined(_M_X64) if (!Core::g_CoreStartupParameter.bMMU && Core::g_CoreStartupParameter.bFastmem && @@ -255,8 +284,6 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce NOP(1); } - // XXX: are these dead anyway? - registersInUse &= ~((1 << ABI_PARAM1) | (1 << ABI_PARAM2) | (1 << RAX)); registersInUseAtLoc[mov] = registersInUse; return; } @@ -280,16 +307,18 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce #endif TEST(32, R(reg_addr), Imm32(mem_mask)); - FixupBranch fast = J_CC(CC_Z); + FixupBranch fast = J_CC(CC_Z, true); MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write bool noProlog = flags & SAFE_WRITE_NO_PROLOG; bool swap = !(flags & SAFE_WRITE_NO_SWAP); + ABI_PushRegistersAndAdjustStack(registersInUse, noProlog); switch (accessSize) { - case 32: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), 2), reg_value, reg_addr, noProlog); break; - case 16: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), 2), reg_value, reg_addr, noProlog); break; - case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), reg_value, reg_addr, noProlog); break; + case 32: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), reg_value, reg_addr, false); break; + case 16: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), reg_value, reg_addr, false); break; + case 8: ABI_CallFunctionRR((void *)&Memory::Write_U8, reg_value, reg_addr, false); break; } + ABI_PopRegistersAndAdjustStack(registersInUse, noProlog); FixupBranch exit = J(); SetJumpTarget(fast); UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap); @@ -315,7 +344,9 @@ void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, u32 re MOV(32, R(EAX), M(&float_buffer)); BSWAP(32, EAX); MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write - ABI_CallFunctionRR(thunks.ProtectFunction(((void *)&Memory::Write_U32), 2), EAX, reg_addr); + ABI_PushRegistersAndAdjustStack(registersInUse, false); + ABI_CallFunctionRR((void *)&Memory::Write_U32, EAX, reg_addr); + ABI_PopRegistersAndAdjustStack(registersInUse, false); FixupBranch arg2 = J(); SetJumpTarget(argh); PSHUFB(xmm_value, M((void *)pbswapShuffle1x4)); diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h index 0fd5db0380..63a0fed0a5 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h +++ b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h @@ -6,7 +6,6 @@ #define _JITUTIL_H #include "x64Emitter.h" -#include "Thunk.h" #include // Like XCodeBlock but has some utilities for memory access. @@ -16,8 +15,8 @@ public: void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset); // these return the address of the MOV, for backpatching u8 *UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true); - u8 *UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend); - void SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend); + u8 *UnsafeLoadToReg(Gen::X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset, bool signExtend); + void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend); enum SafeWriteFlags { SAFE_WRITE_NO_SWAP = 1, @@ -38,7 +37,6 @@ public: void ForceSinglePrecisionS(Gen::X64Reg xmm); void ForceSinglePrecisionP(Gen::X64Reg xmm); protected: - ThunkManager thunks; std::unordered_map registersInUseAtLoc; }; diff --git a/Source/Core/Core/Src/x64MemTools.cpp b/Source/Core/Core/Src/x64MemTools.cpp index a0c79aaed5..62ca755385 100644 --- a/Source/Core/Core/Src/x64MemTools.cpp +++ b/Source/Core/Core/Src/x64MemTools.cpp @@ -65,6 +65,11 @@ bool DoFault(u64 bad_address, SContext *ctx) { ctx->CTX_PC = (u64) new_pc; } + else + { + // there was an error, give the debugger a chance + return false; + } return true; }