From 558dee84ca3f242a8da07758668f1f7fff778ac9 Mon Sep 17 00:00:00 2001 From: comex Date: Mon, 15 Sep 2014 22:37:31 -0400 Subject: [PATCH 1/6] Wrap some function calls in ABI_Push|PopRegistersAndAdjustStack(0, 0); These calls are made outside of JIT blocks, and thus previously did not read any protection - register use is taken into account and the outer dispatcher stack frame is sufficient. However, if data is to be stored on the stack, these calls must reserve stack shadow space on Windows to avoid clobbering it. --- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 18 ++++++++++++++++++ Source/Core/Core/PowerPC/Jit64/JitAsm.cpp | 5 +++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 720375613c..585b7e136f 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -174,7 +174,9 @@ void Jit64::WriteCallInterpreter(UGeckoInstruction inst) MOV(32, PPCSTATE(npc), Imm32(js.compilerPC + 4)); } Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst); + ABI_PushRegistersAndAdjustStack(0, 0); ABI_CallFunctionC((void*)instr, inst.hex); + ABI_PopRegistersAndAdjustStack(0, 0); } void Jit64::unknown_instruction(UGeckoInstruction inst) @@ -191,7 +193,9 @@ void Jit64::HLEFunction(UGeckoInstruction _inst) { gpr.Flush(); fpr.Flush(); + ABI_PushRegistersAndAdjustStack(0, 0); ABI_CallFunctionCC((void*)&HLE::Execute, js.compilerPC, _inst.hex); + ABI_PopRegistersAndAdjustStack(0, 0); } void Jit64::DoNothing(UGeckoInstruction _inst) @@ -227,7 +231,9 @@ void Jit64::Cleanup() { if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0) { + ABI_PushRegistersAndAdjustStack(0, 0); ABI_CallFunction((void *)&GPFifo::CheckGatherPipe); + ABI_PopRegistersAndAdjustStack(0, 0); } // SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time. @@ -278,7 +284,9 @@ void Jit64::WriteRfiExitDestInRSCRATCH() MOV(32, PPCSTATE(pc), R(RSCRATCH)); MOV(32, PPCSTATE(npc), R(RSCRATCH)); Cleanup(); + ABI_PushRegistersAndAdjustStack(0, 0); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions)); + ABI_PopRegistersAndAdjustStack(0, 0); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } @@ -288,7 +296,9 @@ void Jit64::WriteExceptionExit() Cleanup(); MOV(32, R(RSCRATCH), PPCSTATE(pc)); MOV(32, PPCSTATE(npc), R(RSCRATCH)); + ABI_PushRegistersAndAdjustStack(0, 0); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions)); + ABI_PopRegistersAndAdjustStack(0, 0); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } @@ -298,7 +308,9 @@ void Jit64::WriteExternalExceptionExit() Cleanup(); MOV(32, R(RSCRATCH), PPCSTATE(pc)); MOV(32, PPCSTATE(npc), R(RSCRATCH)); + ABI_PushRegistersAndAdjustStack(0, 0); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExternalExceptions)); + ABI_PopRegistersAndAdjustStack(0, 0); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } @@ -395,7 +407,11 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc b->normalEntry = normalEntry; if (ImHereDebug) + { + ABI_PushRegistersAndAdjustStack(0, 0); ABI_CallFunction((void *)&ImHere); //Used to get a trace of the last few blocks before a crash, sometimes VERY useful + ABI_PopRegistersAndAdjustStack(0, 0); + } // Conditionally add profiling code. if (Profiler::g_ProfileBlocks) @@ -548,7 +564,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc fpr.Flush(); MOV(32, PPCSTATE(pc), Imm32(ops[i].address)); + ABI_PushRegistersAndAdjustStack(0, 0); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckBreakPoints)); + ABI_PopRegistersAndAdjustStack(0, 0); TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); FixupBranch noBreakpoint = J_CC(CC_Z); diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index a362768708..dfef055459 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -106,8 +106,9 @@ void Jit64AsmRoutineManager::Generate() SetJumpTarget(notfound); //Ok, no block, let's jit - MOV(32, R(ABI_PARAM1), PPCSTATE(pc)); - CALL((void *)&Jit); + ABI_PushRegistersAndAdjustStack(0, 0); + ABI_CallFunctionA((void *)&Jit, PPCSTATE(pc)); + ABI_PopRegistersAndAdjustStack(0, 0); JMP(dispatcherNoCheck); // no point in special casing this From b597ec3e081a289d9ac782586617a876535183d6 Mon Sep 17 00:00:00 2001 From: comex Date: Sun, 7 Sep 2014 16:36:25 -0400 Subject: [PATCH 2/6] Opportunistically predict BLR destinations using RET. When executing a BL-type instruction, push the new LR onto the stack, then CALL the dispatcher or linked block rather than JMPing to it. When executing BLR, compare [rsp+8] to LR, and RET if it's right, which it usually will be unless the thread was switched out. If it's not right, reset RSP to avoid overflow. This both saves a trip through the dispatcher and improves branch prediction. There is a small possibility of stack overflow anyway, which should be handled... *yawn* --- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 92 +++++++++++++++++-- Source/Core/Core/PowerPC/Jit64/Jit.h | 8 +- Source/Core/Core/PowerPC/Jit64/JitAsm.cpp | 35 ++++++- Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp | 10 +- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 6 +- .../Core/PowerPC/JitCommon/JitAsmCommon.h | 1 + .../Core/Core/PowerPC/JitCommon/JitCache.cpp | 5 +- 7 files changed, 135 insertions(+), 22 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 585b7e136f..b3625c6727 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -227,31 +227,55 @@ static void ImHere() been_here[PC] = 1; } -void Jit64::Cleanup() +bool Jit64::Cleanup() { + bool did_something = false; + if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0) { ABI_PushRegistersAndAdjustStack(0, 0); ABI_CallFunction((void *)&GPFifo::CheckGatherPipe); ABI_PopRegistersAndAdjustStack(0, 0); + did_something = true; } // SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time. if (MMCR0.Hex || MMCR1.Hex) + { ABI_CallFunctionCCC((void *)&PowerPC::UpdatePerformanceMonitor, js.downcountAmount, jit->js.numLoadStoreInst, jit->js.numFloatingPointInst); + did_something = true; + } + + return did_something; } -void Jit64::WriteExit(u32 destination) +void Jit64::WriteExit(u32 destination, bool bl, u32 after) { + // BLR optimization has similar consequences to block linking. + if (!jo.enableBlocklink) + { + bl = false; + } + Cleanup(); + if (bl) + { + MOV(32, R(RSCRATCH2), Imm32(after)); + PUSH(RSCRATCH2); + } + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); + JustWriteExit(destination, bl, after); +} + +void Jit64::JustWriteExit(u32 destination, bool bl, u32 after) +{ //If nobody has taken care of this yet (this can be removed when all branches are done) JitBlock *b = js.curBlock; JitBlock::LinkData linkData; linkData.exitAddress = destination; - linkData.exitPtrs = GetWritableCodePtr(); linkData.linkStatus = false; // Link opportunity! @@ -259,24 +283,78 @@ void Jit64::WriteExit(u32 destination) if (jo.enableBlocklink && (block = blocks.GetBlockNumberFromStartAddress(destination)) >= 0) { // It exists! Joy of joy! - JMP(blocks.GetBlock(block)->checkedEntry, true); + JitBlock* jb = blocks.GetBlock(block); + const u8* addr = jb->checkedEntry; + linkData.exitPtrs = GetWritableCodePtr(); + if (bl) + CALL(addr); + else + JMP(addr, true); linkData.linkStatus = true; } else { MOV(32, PPCSTATE(pc), Imm32(destination)); - JMP(asm_routines.dispatcher, true); + linkData.exitPtrs = GetWritableCodePtr(); + if (bl) + CALL(asm_routines.dispatcher); + else + JMP(asm_routines.dispatcher, true); } b->linkData.push_back(linkData); + + if (bl) + { + POP(RSCRATCH); + JustWriteExit(after, false, 0); + } } -void Jit64::WriteExitDestInRSCRATCH() +void Jit64::WriteExitDestInRSCRATCH(bool bl, u32 after) { + if (!jo.enableBlocklink) + { + bl = false; + } + if (bl) + { + MOV(32, R(RSCRATCH2), Imm32(after)); + PUSH(RSCRATCH2); + } MOV(32, PPCSTATE(pc), R(RSCRATCH)); Cleanup(); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); - JMP(asm_routines.dispatcher, true); + if (bl) + { + CALL(asm_routines.dispatcher); + POP(RSCRATCH); + JustWriteExit(after, false, 0); + } + else + { + JMP(asm_routines.dispatcher, true); + } +} + +void Jit64::WriteBLRExit() +{ + if (!jo.enableBlocklink) + { + WriteExitDestInRSCRATCH(); + return; + } + MOV(32, PPCSTATE(pc), R(RSCRATCH)); + bool disturbed = Cleanup(); + if (disturbed) + MOV(32, R(RSCRATCH), PPCSTATE(pc)); + CMP(64, R(RSCRATCH), MDisp(RSP, 8)); + FixupBranch nope = J_CC(CC_NE); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); + RET(); + SetJumpTarget(nope); + MOV(32, R(RSCRATCH), Imm32(js.downcountAmount)); + JMP(asm_routines.dispatcherMispredictedBLR, true); } void Jit64::WriteRfiExitDestInRSCRATCH() diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index d444f0f834..cface00cb3 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -89,13 +89,15 @@ public: // Utilities for use by opcodes - void WriteExit(u32 destination); - void WriteExitDestInRSCRATCH(); + void WriteExit(u32 destination, bool bl = false, u32 after = 0); + void JustWriteExit(u32 destination, bool bl, u32 after); + void WriteExitDestInRSCRATCH(bool bl = false, u32 after = 0); + void WriteBLRExit(); void WriteExceptionExit(); void WriteExternalExceptionExit(); void WriteRfiExitDestInRSCRATCH(); void WriteCallInterpreter(UGeckoInstruction _inst); - void Cleanup(); + bool Cleanup(); void GenerateConstantOverflow(bool overflow); void GenerateConstantOverflow(s64 val); diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index dfef055459..dc307540f6 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -9,6 +9,9 @@ using namespace Gen; +// Not PowerPC state. Can't put in 'this' because it's out of range... +static void* s_saved_rsp; + // PLAN: no more block numbers - crazy opcodes just contain offset within // dynarec buffer // At this offset - 4, there is an int specifying the block number. @@ -16,7 +19,13 @@ using namespace Gen; void Jit64AsmRoutineManager::Generate() { enterCode = AlignCode16(); - ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); + // We need to own the beginning of RSP, so we do an extra stack adjustment + // for the shadow region before calls in this function. This call will + // waste a bit of space for a second shadow, but whatever. + ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8, /*frame*/ 16); + // something that can't pass the BLR test + MOV(64, M(&s_saved_rsp), R(RSP)); + MOV(64, MDisp(RSP, 8), Imm32((u32)-1)); // Two statically allocated registers. MOV(64, R(RMEM), Imm64((u64)Memory::base)); @@ -24,8 +33,22 @@ void Jit64AsmRoutineManager::Generate() MOV(64, R(RPPCSTATE), Imm64((u64)&PowerPC::ppcState + 0x80)); const u8* outerLoop = GetCodePtr(); + ABI_PushRegistersAndAdjustStack(0, 0); ABI_CallFunction(reinterpret_cast(&CoreTiming::Advance)); + ABI_PopRegistersAndAdjustStack(0, 0); FixupBranch skipToRealDispatch = J(SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging); //skip the sync and compare first time + dispatcherMispredictedBLR = GetCodePtr(); + + #if 0 // debug mispredicts + MOV(32, R(ABI_PARAM1), MDisp(RSP, 8)); // guessed_pc + ABI_PushRegistersAndAdjustStack(1 << RSCRATCH, 0); + CALL(reinterpret_cast(&ReportMispredict)); + ABI_PopRegistersAndAdjustStack(1 << RSCRATCH, 0); + #endif + + MOV(64, R(RSP), M(&s_saved_rsp)); + + SUB(32, PPCSTATE(downcount), R(RSCRATCH)); dispatcher = GetCodePtr(); // The result of slice decrementation should be in flags if somebody jumped here @@ -36,10 +59,13 @@ void Jit64AsmRoutineManager::Generate() { TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(PowerPC::CPU_STEPPING)); FixupBranch notStepping = J_CC(CC_Z); + ABI_PushRegistersAndAdjustStack(0, 0); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckBreakPoints)); + ABI_PopRegistersAndAdjustStack(0, 0); TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); FixupBranch noBreakpoint = J_CC(CC_Z); - ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); + MOV(64, R(RSP), M(&s_saved_rsp)); + ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8, 16); RET(); SetJumpTarget(noBreakpoint); SetJumpTarget(notStepping); @@ -120,14 +146,17 @@ void Jit64AsmRoutineManager::Generate() FixupBranch noExtException = J_CC(CC_Z); MOV(32, R(RSCRATCH), PPCSTATE(pc)); MOV(32, PPCSTATE(npc), R(RSCRATCH)); + ABI_PushRegistersAndAdjustStack(0, 0); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExternalExceptions)); + ABI_PopRegistersAndAdjustStack(0, 0); SetJumpTarget(noExtException); TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); J_CC(CC_Z, outerLoop); //Landing pad for drec space - ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); + MOV(64, R(RSP), M(&s_saved_rsp)); + ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8, 16); RET(); GenerateCommon(); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp index 8456d56b7c..2508fe1417 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp @@ -92,7 +92,7 @@ void Jit64::bx(UGeckoInstruction inst) // make idle loops go faster js.downcountAmount += 8; } - WriteExit(destination); + WriteExit(destination, inst.LK, js.compilerPC + 4); } // TODO - optimize to hell and beyond @@ -133,7 +133,7 @@ void Jit64::bcx(UGeckoInstruction inst) gpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE); - WriteExit(destination); + WriteExit(destination, inst.LK, js.compilerPC + 4); if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) SetJumpTarget( pConditionDontBranch ); @@ -168,7 +168,7 @@ void Jit64::bcctrx(UGeckoInstruction inst) if (inst.LK_3) MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4; AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); - WriteExitDestInRSCRATCH(); + WriteExitDestInRSCRATCH(inst.LK_3, js.compilerPC + 4); } else { @@ -187,7 +187,7 @@ void Jit64::bcctrx(UGeckoInstruction inst) gpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE); - WriteExitDestInRSCRATCH(); + WriteExitDestInRSCRATCH(inst.LK_3, js.compilerPC + 4); // Would really like to continue the block here, but it ends. TODO. SetJumpTarget(b); @@ -235,7 +235,7 @@ void Jit64::bclrx(UGeckoInstruction inst) gpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE); - WriteExitDestInRSCRATCH(); + WriteBLRExit(); if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) SetJumpTarget( pConditionDontBranch ); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index dbb6a5fbf1..79a7c5d76d 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -312,7 +312,7 @@ void Jit64::DoMergedBranch() destination = SignExt16(js.next_inst.BD << 2); else destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2); - WriteExit(destination); + WriteExit(destination, js.next_inst.LK, js.next_compilerPC + 4); } else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx { @@ -320,7 +320,7 @@ void Jit64::DoMergedBranch() MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); MOV(32, R(RSCRATCH), M(&CTR)); AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); - WriteExitDestInRSCRATCH(); + WriteExitDestInRSCRATCH(js.next_inst.LK, js.next_compilerPC + 4); } else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx { @@ -328,7 +328,7 @@ void Jit64::DoMergedBranch() AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); if (js.next_inst.LK) MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); - WriteExitDestInRSCRATCH(); + WriteExitDestInRSCRATCH(js.next_inst.LK, js.next_compilerPC + 4); } else { diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h index 2702db95e1..c3f6a69b5c 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h @@ -17,6 +17,7 @@ public: const u8 *enterCode; + const u8 *dispatcherMispredictedBLR; const u8 *dispatcher; const u8 *dispatcherNoCheck; const u8 *dispatcherPcInRSCRATCH; diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index bf1ce35596..d8fc87f449 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -364,7 +364,10 @@ using namespace Gen; void JitBlockCache::WriteLinkBlock(u8* location, const u8* address) { XEmitter emit(location); - emit.JMP(address, true); + if (*location == 0xE8) + emit.CALL(address); + else + emit.JMP(address, true); } void JitBlockCache::WriteDestroyBlock(const u8* location, u32 address) From bd4e75e69a3bda520ab76f2e2618cc7eda723cee Mon Sep 17 00:00:00 2001 From: comex Date: Sun, 7 Sep 2014 16:56:02 -0400 Subject: [PATCH 3/6] Shorten the blr stub a bit. --- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index b3625c6727..d928d02927 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -349,12 +349,10 @@ void Jit64::WriteBLRExit() if (disturbed) MOV(32, R(RSCRATCH), PPCSTATE(pc)); CMP(64, R(RSCRATCH), MDisp(RSP, 8)); - FixupBranch nope = J_CC(CC_NE); - SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); - RET(); - SetJumpTarget(nope); MOV(32, R(RSCRATCH), Imm32(js.downcountAmount)); - JMP(asm_routines.dispatcherMispredictedBLR, true); + J_CC(CC_NE, asm_routines.dispatcherMispredictedBLR); + SUB(32, PPCSTATE(downcount), R(RSCRATCH)); + RET(); } void Jit64::WriteRfiExitDestInRSCRATCH() From 7b0fdb52cdf40ed4310fd61e10f37e66f17c3bd5 Mon Sep 17 00:00:00 2001 From: comex Date: Sun, 7 Sep 2014 19:10:02 -0400 Subject: [PATCH 4/6] Run exception handlers on an alternate stack on Linux. *Completely untested.* Someone please test. --- Source/Core/Core/ArmMemTools.cpp | 3 +++ Source/Core/Core/Core.cpp | 4 ++++ Source/Core/Core/MemTools.h | 1 + Source/Core/Core/x64MemTools.cpp | 20 ++++++++++++++++++++ 4 files changed, 28 insertions(+) diff --git a/Source/Core/Core/ArmMemTools.cpp b/Source/Core/Core/ArmMemTools.cpp index ff7d77e4f4..c9816aba00 100644 --- a/Source/Core/Core/ArmMemTools.cpp +++ b/Source/Core/Core/ArmMemTools.cpp @@ -86,4 +86,7 @@ void InstallExceptionHandler() sigemptyset(&sa.sa_mask); sigaction(SIGSEGV, &sa, nullptr); } + +void UninstallExceptionHandler() {} + } // namespace diff --git a/Source/Core/Core/Core.cpp b/Source/Core/Core/Core.cpp index 3a764a4e63..5b6294d2c7 100644 --- a/Source/Core/Core/Core.cpp +++ b/Source/Core/Core/Core.cpp @@ -277,6 +277,10 @@ static void CpuThread() if (!_CoreParameter.bCPUThread) g_video_backend->Video_Cleanup(); + #if _M_X86_64 || _M_ARM_32 + EMM::UninstallExceptionHandler(); + #endif + return; } diff --git a/Source/Core/Core/MemTools.h b/Source/Core/Core/MemTools.h index 276af3d887..fcc671b799 100644 --- a/Source/Core/Core/MemTools.h +++ b/Source/Core/Core/MemTools.h @@ -11,4 +11,5 @@ namespace EMM { typedef u32 EAddr; void InstallExceptionHandler(); + void UninstallExceptionHandler(); } diff --git a/Source/Core/Core/x64MemTools.cpp b/Source/Core/Core/x64MemTools.cpp index de298363df..057d3a83f0 100644 --- a/Source/Core/Core/x64MemTools.cpp +++ b/Source/Core/Core/x64MemTools.cpp @@ -125,6 +125,8 @@ void InstallExceptionHandler() handlerInstalled = true; } +void UninstallExceptionHandler() {} + #elif defined(__APPLE__) void CheckKR(const char* name, kern_return_t kr) @@ -243,6 +245,8 @@ void InstallExceptionHandler() CheckKR("mach_port_request_notification", mach_port_request_notification(mach_task_self(), port, MACH_NOTIFY_NO_SENDERS, 0, port, MACH_MSG_TYPE_MAKE_SEND_ONCE, &previous)); } +void UninstallExceptionHandler() {} + #elif defined(_POSIX_VERSION) static void sigsegv_handler(int sig, siginfo_t *info, void *raw_context) @@ -273,6 +277,12 @@ static void sigsegv_handler(int sig, siginfo_t *info, void *raw_context) void InstallExceptionHandler() { + stack_t signal_stack; + signal_stack.ss_sp = malloc(SIGSTKSZ); + signal_stack.ss_size = SIGSTKSZ; + signal_stack.ss_flags = 0; + if (sigaltstack(&signal_stack, nullptr)) + PanicAlert("sigaltstack failed"); struct sigaction sa; sa.sa_handler = nullptr; sa.sa_sigaction = &sigsegv_handler; @@ -281,6 +291,16 @@ void InstallExceptionHandler() sigaction(SIGSEGV, &sa, nullptr); } +void UninstallExceptionHandler() +{ + stack_t signal_stack, old_stack; + signal_stack.ss_flags = SS_DISABLE; + if (!sigaltstack(&signal_stack, &old_stack) && + !(old_stack.ss_flags & SS_DISABLE)) + { + free(old_stack.ss_sp); + } +} #else #error Unsupported x86_64 platform! Report this if you support sigaction From 755bd2c445488709a9c2b7d283ca5e6e56fdc56e Mon Sep 17 00:00:00 2001 From: comex Date: Thu, 11 Sep 2014 00:24:22 -0400 Subject: [PATCH 5/6] Reorganize backpatching a bit. Untested on ARM. Rather than *MemTools.cpp checking whether the address is in the emulated range itself (which, as of the next commit, doesn't cover every kind of access the JIT might want to intercept) and doing PC replacement, they just pass the access address and context to jit->HandleFault, which does the rest itself. Because SContext is now in JitInterface, I wanted JitBackpatch.h (which defines it) to be lightweight, so I moved TrampolineCache and associated x64{Analyzer,Emitter} dependencies into its own file. I hate adding new files in three places, two of which are MSVC... While I'm at it, edit a misleading comment. --- Source/Core/Core/ArmMemTools.cpp | 33 +--- Source/Core/Core/CMakeLists.txt | 5 +- Source/Core/Core/Core.vcxproj | 4 +- Source/Core/Core/Core.vcxproj.filters | 8 +- Source/Core/Core/PowerPC/Jit64IL/JitIL.h | 5 + Source/Core/Core/PowerPC/JitArm32/Jit.h | 6 +- .../PowerPC/JitArm32/JitArm_BackPatch.cpp | 19 +- .../Core/PowerPC/JitCommon/JitBackpatch.cpp | 175 +++--------------- .../Core/PowerPC/JitCommon/JitBackpatch.h | 17 +- Source/Core/Core/PowerPC/JitCommon/JitBase.h | 11 +- .../PowerPC/JitCommon/TrampolineCache.cpp | 156 ++++++++++++++++ .../Core/PowerPC/JitCommon/TrampolineCache.h | 22 +++ Source/Core/Core/PowerPC/JitInterface.cpp | 8 +- Source/Core/Core/PowerPC/JitInterface.h | 4 +- Source/Core/Core/x64MemTools.cpp | 46 +---- 15 files changed, 258 insertions(+), 261 deletions(-) create mode 100644 Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp create mode 100644 Source/Core/Core/PowerPC/JitCommon/TrampolineCache.h diff --git a/Source/Core/Core/ArmMemTools.cpp b/Source/Core/Core/ArmMemTools.cpp index c9816aba00..8b166580a8 100644 --- a/Source/Core/Core/ArmMemTools.cpp +++ b/Source/Core/Core/ArmMemTools.cpp @@ -32,9 +32,9 @@ typedef struct ucontext { } ucontext_t; #endif -void sigsegv_handler(int signal, siginfo_t *info, void *raw_context) +static void sigsegv_handler(int sig, siginfo_t *info, void *raw_context) { - if (signal != SIGSEGV) + if (sig != SIGSEGV) { // We are not interested in other signals - handle it as usual. return; @@ -47,33 +47,18 @@ void sigsegv_handler(int signal, siginfo_t *info, void *raw_context) return; } - // Get all the information we can out of the context. mcontext_t *ctx = &context->uc_mcontext; - void *fault_memory_ptr = (void*)ctx->arm_r10; - u8 *fault_instruction_ptr = (u8 *)ctx->arm_pc; + // comex says hello, and is most curious whether this is arm_r10 for a + // reason as opposed to si_addr like the x64MemTools.cpp version. Is there + // even a need for this file to be architecture specific? + uintptr_t fault_memory_ptr = (uintptr_t)ctx->arm_r10; - if (!JitInterface::IsInCodeSpace(fault_instruction_ptr)) + if (!JitInterface::HandleFault(fault_memory_ptr, ctx)) { - // Let's not prevent debugging. - return; - } - - u64 bad_address = (u64)fault_memory_ptr; - u64 memspace_bottom = (u64)Memory::base; - if (bad_address < memspace_bottom) - { - PanicAlertT("Exception handler - access below memory space. %08llx%08llx", - bad_address >> 32, bad_address); - } - - u32 em_address = (u32)(bad_address - memspace_bottom); - - const u8 *new_rip = jit->BackPatch(fault_instruction_ptr, em_address, ctx); - if (new_rip) - { - ctx->arm_pc = (u32) new_rip; + // retry and crash + signal(SIGSEGV, SIG_DFL); } } diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index 78172d5cb0..39800a448e 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -195,9 +195,10 @@ if(_M_X86) PowerPC/Jit64/Jit_Paired.cpp PowerPC/Jit64/JitRegCache.cpp PowerPC/Jit64/Jit_SystemRegisters.cpp - PowerPC/JitCommon/JitBackpatch.cpp PowerPC/JitCommon/JitAsmCommon.cpp - PowerPC/JitCommon/Jit_Util.cpp) + PowerPC/JitCommon/JitBackpatch.cpp + PowerPC/JitCommon/Jit_Util.cpp + PowerPC/JitCommon/TrampolineCache.cpp) elseif(_M_ARM_32) set(SRCS ${SRCS} ArmMemTools.cpp diff --git a/Source/Core/Core/Core.vcxproj b/Source/Core/Core/Core.vcxproj index 7ead1e4172..b46357fd3d 100644 --- a/Source/Core/Core/Core.vcxproj +++ b/Source/Core/Core/Core.vcxproj @@ -229,6 +229,7 @@ + @@ -406,6 +407,7 @@ + @@ -464,4 +466,4 @@ - \ No newline at end of file + diff --git a/Source/Core/Core/Core.vcxproj.filters b/Source/Core/Core/Core.vcxproj.filters index 39e6aec8f4..faeb9bcd24 100644 --- a/Source/Core/Core/Core.vcxproj.filters +++ b/Source/Core/Core/Core.vcxproj.filters @@ -640,6 +640,9 @@ PowerPC\JitCommon + + PowerPC\JitCommon + PowerPC\JitIL @@ -1182,6 +1185,9 @@ PowerPC\JitCommon + + PowerPC\JitCommon + PowerPC\JitIL @@ -1204,4 +1210,4 @@ - \ No newline at end of file + diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.h b/Source/Core/Core/PowerPC/Jit64IL/JitIL.h index d0185719f3..5592500c2a 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.h +++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL.h @@ -56,6 +56,10 @@ public: void Trace(); + JitBlockCache *GetBlockCache() override { return &blocks; } + + bool HandleFault(uintptr_t access_address, SContext* ctx) override { return false; } + void ClearCache() override; const u8 *GetDispatcher() { @@ -105,4 +109,5 @@ public: void DynaRunTable31(UGeckoInstruction _inst) override; void DynaRunTable59(UGeckoInstruction _inst) override; void DynaRunTable63(UGeckoInstruction _inst) override; + }; diff --git a/Source/Core/Core/PowerPC/JitArm32/Jit.h b/Source/Core/Core/PowerPC/JitArm32/Jit.h index 3cd4cf4478..3fa62d80ab 100644 --- a/Source/Core/Core/PowerPC/JitArm32/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm32/Jit.h @@ -58,6 +58,8 @@ private: void SetFPException(ArmGen::ARMReg Reg, u32 Exception); ArmGen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set); + + bool BackPatch(SContext* ctx); public: JitArm() : code_buffer(32000) {} ~JitArm() {} @@ -72,9 +74,7 @@ public: JitBaseBlockCache *GetBlockCache() { return &blocks; } - const u8 *BackPatch(u8 *codePtr, u32 em_address, void *ctx); - - bool IsInCodeSpace(u8 *ptr) { return IsInSpace(ptr); } + bool HandleFault(uintptr_t access_address, SContext* ctx) override; void Trace(); diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_BackPatch.cpp index 6ba24195f5..ee0cf1ee76 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_BackPatch.cpp @@ -66,12 +66,23 @@ bool DisamLoadStore(const u32 inst, ARMReg &rD, u8 &accessSize, bool &Store) } return true; } -const u8 *JitArm::BackPatch(u8 *codePtr, u32, void *ctx_void) + +bool JitArm::HandleFault(uintptr_t access_address, SContext* ctx) +{ + if (access_address < (uintptr_t)Memory::base) + { + PanicAlertT("Exception handler - access below memory space. %08llx%08llx", + access_address >> 32, access_address); + } + return BackPatch(ctx); +} + +bool JitArm::BackPatch(SContext* ctx) { // TODO: This ctx needs to be filled with our information - SContext *ctx = (SContext *)ctx_void; // We need to get the destination register before we start + u8* codePtr = (u8*)ctx->CTX_PC; u32 Value = *(u32*)codePtr; ARMReg rD; u8 accessSize; @@ -109,7 +120,7 @@ const u8 *JitArm::BackPatch(u8 *codePtr, u32, void *ctx_void) u32 newPC = ctx->CTX_PC - (ARMREGOFFSET + 4 * 4); ctx->CTX_PC = newPC; emitter.FlushIcache(); - return (u8*)ctx->CTX_PC; + return true; } else { @@ -135,7 +146,7 @@ const u8 *JitArm::BackPatch(u8 *codePtr, u32, void *ctx_void) emitter.MOV(rD, R14); // 8 ctx->CTX_PC -= ARMREGOFFSET + (4 * 4); emitter.FlushIcache(); - return (u8*)ctx->CTX_PC; + return true; } return 0; } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp index 6e89ae32eb..ea921817b8 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp @@ -3,24 +3,14 @@ // Refer to the license.txt file included. #include -#include #include "disasm.h" -#include "Common/CommonTypes.h" -#include "Common/StringUtil.h" #include "Core/PowerPC/JitCommon/JitBackpatch.h" #include "Core/PowerPC/JitCommon/JitBase.h" -#ifdef _WIN32 - #include -#endif - - using namespace Gen; -extern u8 *trampolineCodePtr; - static void BackPatchError(const std::string &text, u8 *codePtr, u32 emAddress) { u64 code_addr = (u64)codePtr; @@ -35,176 +25,51 @@ static void BackPatchError(const std::string &text, u8 *codePtr, u32 emAddress) return; } -void TrampolineCache::Init() +// This generates some fairly heavy trampolines, but it doesn't really hurt. +// Only instructions that access I/O will get these, and there won't be that +// many of them in a typical program/game. +bool Jitx86Base::HandleFault(uintptr_t access_address, SContext* ctx) { - AllocCodeSpace(4 * 1024 * 1024); + // TODO: do we properly handle off-the-end? + if (access_address >= (uintptr_t)Memory::base && access_address < (uintptr_t)Memory::base + 0x100010000) + return BackPatch((u32)(access_address - (uintptr_t)Memory::base), ctx); + + return false; } -void TrampolineCache::Shutdown() +bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx) { - FreeCodeSpace(); -} + u8* codePtr = (u8*) ctx->CTX_PC; -// Extremely simplistic - just generate the requested trampoline. May reuse them in the future. -const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 registersInUse) -{ - if (GetSpaceLeft() < 1024) - PanicAlert("Trampoline cache full"); - - const u8 *trampoline = GetCodePtr(); - X64Reg addrReg = (X64Reg)info.scaledReg; - X64Reg dataReg = (X64Reg)info.regOperandReg; - - // It's a read. Easy. - // RSP alignment here is 8 due to the call. - ABI_PushRegistersAndAdjustStack(registersInUse, 8); - - if (addrReg != ABI_PARAM1) - MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg)); - - if (info.displacement) - ADD(32, R(ABI_PARAM1), Imm32(info.displacement)); - - switch (info.operandSize) - { - case 4: - CALL((void *)&Memory::Read_U32); - break; - case 2: - CALL((void *)&Memory::Read_U16); - SHL(32, R(ABI_RETURN), Imm8(16)); - break; - case 1: - CALL((void *)&Memory::Read_U8); - break; - } - - if (info.signExtend && info.operandSize == 1) - { - // Need to sign extend value from Read_U8. - MOVSX(32, 8, dataReg, R(ABI_RETURN)); - } - else if (dataReg != EAX) - { - MOV(32, R(dataReg), R(ABI_RETURN)); - } - - ABI_PopRegistersAndAdjustStack(registersInUse, 8); - RET(); - return trampoline; -} - -// Extremely simplistic - just generate the requested trampoline. May reuse them in the future. -const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc) -{ - if (GetSpaceLeft() < 1024) - PanicAlert("Trampoline cache full"); - - const u8 *trampoline = GetCodePtr(); - - X64Reg dataReg = (X64Reg)info.regOperandReg; - X64Reg addrReg = (X64Reg)info.scaledReg; - - // It's a write. Yay. Remember that we don't have to be super efficient since it's "just" a - // hardware access - we can take shortcuts. - // Don't treat FIFO writes specially for now because they require a burst - // check anyway. - - // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs - MOV(32, PPCSTATE(pc), Imm32(pc)); - - ABI_PushRegistersAndAdjustStack(registersInUse, 8); - - if (info.hasImmediate) - { - if (addrReg != ABI_PARAM2) - MOV(64, R(ABI_PARAM2), R(addrReg)); - // we have to swap back the immediate to pass it to the write functions - switch (info.operandSize) - { - case 8: - PanicAlert("Invalid 64-bit immediate!"); - break; - case 4: - MOV(32, R(ABI_PARAM1), Imm32(Common::swap32((u32)info.immediate))); - break; - case 2: - MOV(16, R(ABI_PARAM1), Imm16(Common::swap16((u16)info.immediate))); - break; - case 1: - MOV(8, R(ABI_PARAM1), Imm8((u8)info.immediate)); - break; - } - } - else - { - MOVTwo(64, ABI_PARAM1, dataReg, ABI_PARAM2, addrReg); - } - if (info.displacement) - { - ADD(32, R(ABI_PARAM2), Imm32(info.displacement)); - } - - switch (info.operandSize) - { - case 8: - CALL((void *)&Memory::Write_U64); - break; - case 4: - CALL((void *)&Memory::Write_U32); - break; - case 2: - CALL((void *)&Memory::Write_U16); - break; - case 1: - CALL((void *)&Memory::Write_U8); - break; - } - - ABI_PopRegistersAndAdjustStack(registersInUse, 8); - RET(); - - return trampoline; -} - - -// This generates some fairly heavy trampolines, but: -// 1) It's really necessary. We don't know anything about the context. -// 2) It doesn't really hurt. Only instructions that access I/O will get these, and there won't be -// that many of them in a typical program/game. -const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void) -{ - SContext *ctx = (SContext *)ctx_void; - - if (!jit->IsInCodeSpace(codePtr)) - return nullptr; // this will become a regular crash real soon after this + if (!IsInSpace(codePtr)) + return false; // this will become a regular crash real soon after this InstructionInfo info = {}; if (!DisassembleMov(codePtr, &info)) { BackPatchError("BackPatch - failed to disassemble MOV instruction", codePtr, emAddress); - return nullptr; + return false; } if (info.otherReg != RMEM) { PanicAlert("BackPatch : Base reg not RMEM." "\n\nAttempted to access %08x.", emAddress); - return nullptr; + return false; } if (info.byteSwap && info.instructionSize < BACKPATCH_SIZE) { PanicAlert("BackPatch: MOVBE is too small"); - return nullptr; + return false; } auto it = registersInUseAtLoc.find(codePtr); if (it == registersInUseAtLoc.end()) { PanicAlert("BackPatch: no register use entry for address %p", codePtr); - return nullptr; + return false; } u32 registersInUse = it->second; @@ -228,7 +93,7 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void) { emitter.NOP(padding); } - return codePtr; + ctx->CTX_PC = (u64)codePtr; } else { @@ -281,6 +146,8 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void) { emitter.NOP(padding); } - return start; + ctx->CTX_PC = (u64)start; } + + return true; } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h index 3ca7656b21..39e3389501 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h @@ -5,11 +5,6 @@ #pragma once #include "Common/CommonTypes.h" -#include "Common/x64Analyzer.h" -#include "Common/x64Emitter.h" - -// We need at least this many bytes for backpatching. -const int BACKPATCH_SIZE = 5; // meh. #if defined(_WIN32) @@ -147,8 +142,8 @@ const int BACKPATCH_SIZE = 5; #endif #if _M_X86_64 -#define CTX_PC CTX_RIP #include +#define CTX_PC CTX_RIP static inline u64 *ContextRN(SContext* ctx, int n) { static const u8 offsets[] = @@ -173,13 +168,3 @@ static inline u64 *ContextRN(SContext* ctx, int n) return (u64 *) ((char *) ctx + offsets[n]); } #endif - -class TrampolineCache : public Gen::X64CodeBlock -{ -public: - void Init(); - void Shutdown(); - - const u8 *GetReadTrampoline(const InstructionInfo &info, u32 registersInUse); - const u8 *GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc); -}; diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index c6ff6e4967..52463ec619 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -26,6 +26,7 @@ #include "Core/PowerPC/JitCommon/JitAsmCommon.h" #include "Core/PowerPC/JitCommon/JitBackpatch.h" #include "Core/PowerPC/JitCommon/JitCache.h" +#include "Core/PowerPC/JitCommon/TrampolineCache.h" // TODO: find a better place for x86-specific stuff // The following register assignments are common to Jit64 and Jit64IL: @@ -110,24 +111,20 @@ public: virtual void Jit(u32 em_address) = 0; - virtual const u8 *BackPatch(u8 *codePtr, u32 em_address, void *ctx) = 0; - virtual const CommonAsmRoutinesBase *GetAsmRoutines() = 0; - virtual bool IsInCodeSpace(u8 *ptr) = 0; + virtual bool HandleFault(uintptr_t access_address, SContext* ctx) = 0; }; class Jitx86Base : public JitBase, public EmuCodeBlock { protected: + bool BackPatch(u32 emAddress, SContext* ctx); JitBlockCache blocks; TrampolineCache trampolines; public: JitBlockCache *GetBlockCache() override { return &blocks; } - - const u8 *BackPatch(u8 *codePtr, u32 em_address, void *ctx) override; - - bool IsInCodeSpace(u8 *ptr) override { return IsInSpace(ptr); } + bool HandleFault(uintptr_t access_address, SContext* ctx) override; }; extern JitBase *jit; diff --git a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp new file mode 100644 index 0000000000..5e961bc6e5 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp @@ -0,0 +1,156 @@ +// Copyright 2013 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include +#include + +#include "Common/CommonTypes.h" +#include "Common/StringUtil.h" +#include "Common/x64ABI.h" +#include "Core/HW/Memmap.h" +#include "Core/PowerPC/JitCommon/JitBase.h" +#include "Core/PowerPC/JitCommon/TrampolineCache.h" + +#ifdef _WIN32 + #include +#endif + + +using namespace Gen; + +extern u8 *trampolineCodePtr; + +void TrampolineCache::Init() +{ + AllocCodeSpace(4 * 1024 * 1024); +} + +void TrampolineCache::Shutdown() +{ + FreeCodeSpace(); +} + +// Extremely simplistic - just generate the requested trampoline. May reuse them in the future. +const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 registersInUse) +{ + if (GetSpaceLeft() < 1024) + PanicAlert("Trampoline cache full"); + + const u8 *trampoline = GetCodePtr(); + X64Reg addrReg = (X64Reg)info.scaledReg; + X64Reg dataReg = (X64Reg)info.regOperandReg; + + // It's a read. Easy. + // RSP alignment here is 8 due to the call. + ABI_PushRegistersAndAdjustStack(registersInUse, 8); + + if (addrReg != ABI_PARAM1) + MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg)); + + if (info.displacement) + ADD(32, R(ABI_PARAM1), Imm32(info.displacement)); + + switch (info.operandSize) + { + case 4: + CALL((void *)&Memory::Read_U32); + break; + case 2: + CALL((void *)&Memory::Read_U16); + SHL(32, R(ABI_RETURN), Imm8(16)); + break; + case 1: + CALL((void *)&Memory::Read_U8); + break; + } + + if (info.signExtend && info.operandSize == 1) + { + // Need to sign extend value from Read_U8. + MOVSX(32, 8, dataReg, R(ABI_RETURN)); + } + else if (dataReg != EAX) + { + MOV(32, R(dataReg), R(ABI_RETURN)); + } + + ABI_PopRegistersAndAdjustStack(registersInUse, 8); + RET(); + return trampoline; +} + +// Extremely simplistic - just generate the requested trampoline. May reuse them in the future. +const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc) +{ + if (GetSpaceLeft() < 1024) + PanicAlert("Trampoline cache full"); + + const u8 *trampoline = GetCodePtr(); + + X64Reg dataReg = (X64Reg)info.regOperandReg; + X64Reg addrReg = (X64Reg)info.scaledReg; + + // It's a write. Yay. Remember that we don't have to be super efficient since it's "just" a + // hardware access - we can take shortcuts. + // Don't treat FIFO writes specially for now because they require a burst + // check anyway. + + // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs + MOV(32, PPCSTATE(pc), Imm32(pc)); + + ABI_PushRegistersAndAdjustStack(registersInUse, 8); + + if (info.hasImmediate) + { + if (addrReg != ABI_PARAM2) + MOV(64, R(ABI_PARAM2), R(addrReg)); + // we have to swap back the immediate to pass it to the write functions + switch (info.operandSize) + { + case 8: + PanicAlert("Invalid 64-bit immediate!"); + break; + case 4: + MOV(32, R(ABI_PARAM1), Imm32(Common::swap32((u32)info.immediate))); + break; + case 2: + MOV(16, R(ABI_PARAM1), Imm16(Common::swap16((u16)info.immediate))); + break; + case 1: + MOV(8, R(ABI_PARAM1), Imm8((u8)info.immediate)); + break; + } + } + else + { + MOVTwo(64, ABI_PARAM1, dataReg, ABI_PARAM2, addrReg); + } + if (info.displacement) + { + ADD(32, R(ABI_PARAM2), Imm32(info.displacement)); + } + + switch (info.operandSize) + { + case 8: + CALL((void *)&Memory::Write_U64); + break; + case 4: + CALL((void *)&Memory::Write_U32); + break; + case 2: + CALL((void *)&Memory::Write_U16); + break; + case 1: + CALL((void *)&Memory::Write_U8); + break; + } + + ABI_PopRegistersAndAdjustStack(registersInUse, 8); + RET(); + + return trampoline; +} + + diff --git a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.h b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.h new file mode 100644 index 0000000000..516a071ac2 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.h @@ -0,0 +1,22 @@ +// Copyright 2013 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#pragma once + +#include "Common/CommonTypes.h" +#include "Common/x64Analyzer.h" +#include "Common/x64Emitter.h" + +// We need at least this many bytes for backpatching. +const int BACKPATCH_SIZE = 5; + +class TrampolineCache : public Gen::X64CodeBlock +{ +public: + void Init(); + void Shutdown(); + + const u8 *GetReadTrampoline(const InstructionInfo &info, u32 registersInUse); + const u8 *GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse, u32 pc); +}; diff --git a/Source/Core/Core/PowerPC/JitInterface.cpp b/Source/Core/Core/PowerPC/JitInterface.cpp index ea9b12be70..6bfd1c4009 100644 --- a/Source/Core/Core/PowerPC/JitInterface.cpp +++ b/Source/Core/Core/PowerPC/JitInterface.cpp @@ -190,13 +190,9 @@ namespace JitInterface } #endif } - bool IsInCodeSpace(u8 *ptr) + bool HandleFault(uintptr_t access_address, SContext* ctx) { - return jit->IsInCodeSpace(ptr); - } - const u8 *BackPatch(u8 *codePtr, u32 em_address, void *ctx) - { - return jit->BackPatch(codePtr, em_address, ctx); + return jit->HandleFault(access_address, ctx); } void ClearCache() diff --git a/Source/Core/Core/PowerPC/JitInterface.h b/Source/Core/Core/PowerPC/JitInterface.h index 3cb57422bb..a8ed783726 100644 --- a/Source/Core/Core/PowerPC/JitInterface.h +++ b/Source/Core/Core/PowerPC/JitInterface.h @@ -7,6 +7,7 @@ #include #include "Common/ChunkFile.h" #include "Core/PowerPC/CPUCoreBase.h" +#include "Core/PowerPC/JitCommon/JitBackpatch.h" namespace JitInterface { @@ -20,8 +21,7 @@ namespace JitInterface void WriteProfileResults(const std::string& filename); // Memory Utilities - bool IsInCodeSpace(u8 *ptr); - const u8 *BackPatch(u8 *codePtr, u32 em_address, void *ctx); + bool HandleFault(uintptr_t access_address, SContext* ctx); // used by JIT to read instructions u32 Read_Opcode_JIT(const u32 _Address); diff --git a/Source/Core/Core/x64MemTools.cpp b/Source/Core/Core/x64MemTools.cpp index 057d3a83f0..518c3bb160 100644 --- a/Source/Core/Core/x64MemTools.cpp +++ b/Source/Core/Core/x64MemTools.cpp @@ -23,42 +23,6 @@ namespace EMM { -static bool DoFault(u64 bad_address, SContext *ctx) -{ - if (!JitInterface::IsInCodeSpace((u8*) ctx->CTX_PC)) - { - // Let's not prevent debugging. - return false; - } - - u64 memspace_bottom = (u64)Memory::base; - u64 memspace_top = memspace_bottom + -#if _ARCH_64 - 0x100000000ULL; -#else - 0x40000000; -#endif - - if (bad_address < memspace_bottom || bad_address >= memspace_top) - { - return false; - } - - u32 em_address = (u32)(bad_address - memspace_bottom); - const u8 *new_pc = jit->BackPatch((u8*) ctx->CTX_PC, em_address, ctx); - if (new_pc) - { - ctx->CTX_PC = (u64) new_pc; - } - else - { - // there was an error, give the debugger a chance - return false; - } - - return true; -} - #ifdef _WIN32 LONG NTAPI Handler(PEXCEPTION_POINTERS pPtrs) @@ -74,10 +38,10 @@ LONG NTAPI Handler(PEXCEPTION_POINTERS pPtrs) } // virtual address of the inaccessible data - u64 badAddress = (u64)pPtrs->ExceptionRecord->ExceptionInformation[1]; + uintptr_t badAddress = (uintptr_t)pPtrs->ExceptionRecord->ExceptionInformation[1]; CONTEXT *ctx = pPtrs->ContextRecord; - if (DoFault(badAddress, ctx)) + if (JitInterface::HandleFault(badAddress, ctx)) { return (DWORD)EXCEPTION_CONTINUE_EXECUTION; } @@ -198,7 +162,7 @@ void ExceptionThread(mach_port_t port) x86_thread_state64_t *state = (x86_thread_state64_t *) msg_in.old_state; - bool ok = DoFault(msg_in.code[1], state); + bool ok = JitInterface::HandleFault((uintptr_t) msg_in.code[1], state); // Set up the reply. msg_out.Head.msgh_bits = MACH_MSGH_BITS(MACH_MSGH_BITS_REMOTE(msg_in.Head.msgh_bits), 0); @@ -263,12 +227,12 @@ static void sigsegv_handler(int sig, siginfo_t *info, void *raw_context) // Huh? Return. return; } - u64 bad_address = (u64)info->si_addr; + uintptr_t bad_address = (uintptr_t)info->si_addr; // Get all the information we can out of the context. mcontext_t *ctx = &context->uc_mcontext; // assume it's not a write - if (!DoFault(bad_address, ctx)) + if (!JitInterface::HandleFault(bad_address, ctx)) { // retry and crash signal(SIGSEGV, SIG_DFL); From 7ad90275934b63dcc4726a7c49ee9a64758dda3b Mon Sep 17 00:00:00 2001 From: comex Date: Mon, 15 Sep 2014 23:03:07 -0400 Subject: [PATCH 6/6] Be pedantic about stack overflow on Linux and OS X. Add some magic to the fault handler to handle stack overflow due to BLR optimization, and disable the optimization if fastmem is not enabled. --- Source/Core/Common/MemoryUtil.cpp | 19 ++++ Source/Core/Common/MemoryUtil.h | 4 + Source/Core/Common/x64Emitter.cpp | 2 + Source/Core/Common/x64Emitter.h | 2 + Source/Core/Core/PowerPC/Jit64/Jit.cpp | 114 ++++++++++++++++++--- Source/Core/Core/PowerPC/Jit64/Jit.h | 13 +++ Source/Core/Core/PowerPC/Jit64/JitAsm.cpp | 37 +++++-- Source/Core/Core/PowerPC/Jit64/JitAsm.h | 4 +- Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp | 2 +- 9 files changed, 174 insertions(+), 23 deletions(-) diff --git a/Source/Core/Common/MemoryUtil.cpp b/Source/Core/Common/MemoryUtil.cpp index f7e1d7d902..a741deef4f 100644 --- a/Source/Core/Common/MemoryUtil.cpp +++ b/Source/Core/Common/MemoryUtil.cpp @@ -158,6 +158,25 @@ void FreeAlignedMemory(void* ptr) } } +void ReadProtectMemory(void* ptr, size_t size) +{ + bool error_occurred = false; + +#ifdef _WIN32 + DWORD oldValue; + if (!VirtualProtect(ptr, size, PAGE_NOACCESS, &oldValue)) + error_occurred = true; +#else + int retval = mprotect(ptr, size, PROT_NONE); + + if (retval != 0) + error_occurred = true; +#endif + + if (error_occurred) + PanicAlert("ReadProtectMemory failed!\n%s", GetLastErrorMsg()); +} + void WriteProtectMemory(void* ptr, size_t size, bool allowExecute) { bool error_occurred = false; diff --git a/Source/Core/Common/MemoryUtil.h b/Source/Core/Common/MemoryUtil.h index 6f437fcda7..5f584f868d 100644 --- a/Source/Core/Common/MemoryUtil.h +++ b/Source/Core/Common/MemoryUtil.h @@ -12,8 +12,12 @@ void* AllocateMemoryPages(size_t size); void FreeMemoryPages(void* ptr, size_t size); void* AllocateAlignedMemory(size_t size,size_t alignment); void FreeAlignedMemory(void* ptr); +void ReadProtectMemory(void* ptr, size_t size); void WriteProtectMemory(void* ptr, size_t size, bool executable = false); void UnWriteProtectMemory(void* ptr, size_t size, bool allowExecute = false); std::string MemUsage(); +void GuardMemoryMake(void* ptr, size_t size); +void GuardMemoryUnmake(void* ptr, size_t size); + inline int GetPageSize() { return 4096; } diff --git a/Source/Core/Common/x64Emitter.cpp b/Source/Core/Common/x64Emitter.cpp index fa16cf2b36..75cd418379 100644 --- a/Source/Core/Common/x64Emitter.cpp +++ b/Source/Core/Common/x64Emitter.cpp @@ -1766,6 +1766,8 @@ void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI void XEmitter::LOCK() { Write8(0xF0); } void XEmitter::REP() { Write8(0xF3); } void XEmitter::REPNE() { Write8(0xF2); } +void XEmitter::FSOverride() { Write8(0x64); } +void XEmitter::GSOverride() { Write8(0x65); } void XEmitter::FWAIT() { diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index 8f41065668..8b655c2c42 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -467,6 +467,8 @@ public: void LOCK(); void REP(); void REPNE(); + void FSOverride(); + void GSOverride(); // x87 enum x87StatusWordBits { diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index d928d02927..92595f6acd 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -95,6 +95,83 @@ using namespace PowerPC; and such, but it's currently limited to integer ops only. This can definitely be made better. */ +// The BLR optimization is nice, but it means that JITted code can overflow the +// native stack by repeatedly running BL. (The chance of this happening in any +// retail game is close to 0, but correctness is correctness...) Also, the +// overflow might not happen directly in the JITted code but in a C++ function +// called from it, so we can't just adjust RSP in the case of a fault. +// Instead, we have to have extra stack space preallocated under the fault +// point which allows the code to continue, after wiping the JIT cache so we +// can reset things at a safe point. Once this condition trips, the +// optimization is permanently disabled, under the assumption this will never +// happen in practice. + +// On Unix, we just mark an appropriate region of the stack as PROT_NONE and +// handle it the same way as fastmem faults. It's safe to take a fault with a +// bad RSP, because on Linux we can use sigaltstack and on OS X we're already +// on a separate thread. + +// On Windows, the OS gets upset if RSP doesn't work, and I don't know any +// equivalent of sigaltstack. Windows supports guard pages which, when +// accessed, immediately turn into regular pages but cause a trap... but +// putting them in the path of RSP just leads to something (in the kernel?) +// thinking a regular stack extension is required. So this protection is not +// supported on Windows yet... We still use a separate stack for the sake of +// simplicity. + +enum +{ + STACK_SIZE = 2 * 1024 * 1024, + SAFE_STACK_SIZE = 512 * 1024, + GUARD_SIZE = 0x10000, // two guards - bottom (permanent) and middle (see above) + GUARD_OFFSET = STACK_SIZE - SAFE_STACK_SIZE - GUARD_SIZE, +}; + +void Jit64::AllocStack() +{ +#if defined(_WIN32) + m_stack = (u8*)AllocateMemoryPages(STACK_SIZE); + ReadProtectMemory(m_stack, GUARD_SIZE); + ReadProtectMemory(m_stack + GUARD_OFFSET, GUARD_SIZE); +#endif +} + +void Jit64::FreeStack() +{ +#if defined(_WIN32) + if (m_stack) + { + FreeMemoryPages(m_stack, STACK_SIZE); + m_stack = NULL; + } +#endif +} + +bool Jit64::HandleFault(uintptr_t access_address, SContext* ctx) +{ + uintptr_t stack = (uintptr_t)m_stack, diff = access_address - stack; + // In the trap region? + if (stack && diff >= GUARD_OFFSET && diff < GUARD_OFFSET + GUARD_SIZE) + { + WARN_LOG(POWERPC, "BLR cache disabled due to excessive BL in the emulated program."); + m_enable_blr_optimization = false; + UnWriteProtectMemory(m_stack + GUARD_OFFSET, GUARD_SIZE); + // We're going to need to clear the whole cache to get rid of the bad + // CALLs, but we can't yet. Fake the downcount so we're forced to the + // dispatcher (no block linking), and clear the cache so we're sent to + // Jit. Yeah, it's kind of gross. + GetBlockCache()->InvalidateICache(0, 0xffffffff); + CoreTiming::ForceExceptionCheck(0); + m_clear_cache_asap = true; + + return true; + } + + return Jitx86Base::HandleFault(access_address, ctx); +} + + + void Jit64::Init() { jo.optimizeStack = true; @@ -130,8 +207,18 @@ void Jit64::Init() trampolines.Init(); AllocCodeSpace(CODE_SIZE); + + // BLR optimization has the same consequences as block linking, as well as + // depending on the fault handler to be safe in the event of excessive BL. + m_enable_blr_optimization = jo.enableBlocklink && SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem; + m_clear_cache_asap = false; + + m_stack = nullptr; + if (m_enable_blr_optimization) + AllocStack(); + blocks.Init(); - asm_routines.Init(); + asm_routines.Init(m_stack ? (m_stack + STACK_SIZE) : nullptr); // important: do this *after* generating the global asm routines, because we can't use farcode in them. // it'll crash because the farcode functions get cleared on JIT clears. @@ -155,6 +242,7 @@ void Jit64::ClearCache() void Jit64::Shutdown() { + FreeStack(); FreeCodeSpace(); blocks.Shutdown(); @@ -251,11 +339,8 @@ bool Jit64::Cleanup() void Jit64::WriteExit(u32 destination, bool bl, u32 after) { - // BLR optimization has similar consequences to block linking. - if (!jo.enableBlocklink) - { + if (!m_enable_blr_optimization) bl = false; - } Cleanup(); @@ -313,17 +398,17 @@ void Jit64::JustWriteExit(u32 destination, bool bl, u32 after) void Jit64::WriteExitDestInRSCRATCH(bool bl, u32 after) { - if (!jo.enableBlocklink) - { + if (!m_enable_blr_optimization) bl = false; - } + MOV(32, PPCSTATE(pc), R(RSCRATCH)); + Cleanup(); + if (bl) { MOV(32, R(RSCRATCH2), Imm32(after)); PUSH(RSCRATCH2); } - MOV(32, PPCSTATE(pc), R(RSCRATCH)); - Cleanup(); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); if (bl) { @@ -339,7 +424,7 @@ void Jit64::WriteExitDestInRSCRATCH(bool bl, u32 after) void Jit64::WriteBLRExit() { - if (!jo.enableBlocklink) + if (!m_enable_blr_optimization) { WriteExitDestInRSCRATCH(); return; @@ -428,8 +513,11 @@ void Jit64::Trace() void STACKALIGN Jit64::Jit(u32 em_address) { - if (GetSpaceLeft() < 0x10000 || farcode.GetSpaceLeft() < 0x10000 || blocks.IsFull() || - SConfig::GetInstance().m_LocalCoreStartupParameter.bJITNoBlockCache) + if (GetSpaceLeft() < 0x10000 || + farcode.GetSpaceLeft() < 0x10000 || + blocks.IsFull() || + SConfig::GetInstance().m_LocalCoreStartupParameter.bJITNoBlockCache || + m_clear_cache_asap) { ClearCache(); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index cface00cb3..0391d258cc 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -18,6 +18,10 @@ // ---------- #pragma once +#ifdef _WIN32 +#include +#endif + #include "Common/x64ABI.h" #include "Common/x64Analyzer.h" #include "Common/x64Emitter.h" @@ -40,6 +44,9 @@ class Jit64 : public Jitx86Base { private: + void AllocStack(); + void FreeStack(); + GPRRegCache gpr; FPURegCache fpr; @@ -48,6 +55,10 @@ private: PPCAnalyst::CodeBuffer code_buffer; Jit64AsmRoutineManager asm_routines; + bool m_enable_blr_optimization; + bool m_clear_cache_asap; + u8* m_stack; + public: Jit64() : code_buffer(32000) {} ~Jit64() {} @@ -55,6 +66,8 @@ public: void Init() override; void Shutdown() override; + bool HandleFault(uintptr_t access_address, SContext* ctx) override; + // Jit! void Jit(u32 em_address) override; diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index dc307540f6..dcfffaa3e9 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -23,8 +23,18 @@ void Jit64AsmRoutineManager::Generate() // for the shadow region before calls in this function. This call will // waste a bit of space for a second shadow, but whatever. ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8, /*frame*/ 16); + if (m_stack_top) + { + // Pivot the stack to our custom one. + MOV(64, R(RSCRATCH), R(RSP)); + MOV(64, R(RSP), Imm64((u64)m_stack_top - 0x20)); + MOV(64, MDisp(RSP, 0x18), R(RSCRATCH)); + } + else + { + MOV(64, M(&s_saved_rsp), R(RSP)); + } // something that can't pass the BLR test - MOV(64, M(&s_saved_rsp), R(RSP)); MOV(64, MDisp(RSP, 8), Imm32((u32)-1)); // Two statically allocated registers. @@ -46,7 +56,10 @@ void Jit64AsmRoutineManager::Generate() ABI_PopRegistersAndAdjustStack(1 << RSCRATCH, 0); #endif - MOV(64, R(RSP), M(&s_saved_rsp)); + if (m_stack_top) + MOV(64, R(RSP), Imm64((u64)m_stack_top - 0x20)); + else + MOV(64, R(RSP), M(&s_saved_rsp)); SUB(32, PPCSTATE(downcount), R(RSCRATCH)); @@ -55,6 +68,8 @@ void Jit64AsmRoutineManager::Generate() // IMPORTANT - We jump on negative, not carry!!! FixupBranch bail = J_CC(CC_BE, true); + FixupBranch dbg_exit; + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging) { TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(PowerPC::CPU_STEPPING)); @@ -63,11 +78,7 @@ void Jit64AsmRoutineManager::Generate() ABI_CallFunction(reinterpret_cast(&PowerPC::CheckBreakPoints)); ABI_PopRegistersAndAdjustStack(0, 0); TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); - FixupBranch noBreakpoint = J_CC(CC_Z); - MOV(64, R(RSP), M(&s_saved_rsp)); - ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8, 16); - RET(); - SetJumpTarget(noBreakpoint); + dbg_exit = J_CC(CC_NZ); SetJumpTarget(notStepping); } @@ -155,7 +166,17 @@ void Jit64AsmRoutineManager::Generate() J_CC(CC_Z, outerLoop); //Landing pad for drec space - MOV(64, R(RSP), M(&s_saved_rsp)); + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging) + SetJumpTarget(dbg_exit); + if (m_stack_top) + { + MOV(64, R(RSP), Imm64((u64)m_stack_top - 0x8)); + POP(RSP); + } + else + { + MOV(64, R(RSP), M(&s_saved_rsp)); + } ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8, 16); RET(); diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.h b/Source/Core/Core/PowerPC/Jit64/JitAsm.h index e3cc4371f7..9272f5c8aa 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.h +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.h @@ -25,10 +25,12 @@ class Jit64AsmRoutineManager : public CommonAsmRoutines private: void Generate(); void GenerateCommon(); + u8* m_stack_top; public: - void Init() + void Init(u8* stack_top) { + m_stack_top = stack_top; AllocCodeSpace(8192); Generate(); WriteProtect(); diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp index 81260249c7..9f9f9cf98c 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp @@ -272,7 +272,7 @@ void JitIL::Init() trampolines.Init(); AllocCodeSpace(CODE_SIZE); blocks.Init(); - asm_routines.Init(); + asm_routines.Init(nullptr); farcode.Init(js.memcheck ? FARCODE_SIZE_MMU : FARCODE_SIZE);