diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 6c9a616913..c475121d2a 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -10,23 +10,18 @@ #include #include +// for the PROFILER stuff #ifdef _WIN32 #include -#include -#else -#include #endif -#include "Common/Align.h" #include "Common/CommonTypes.h" #include "Common/GekkoDisassembler.h" #include "Common/IOFile.h" #include "Common/Logging/Log.h" -#include "Common/MemoryUtil.h" #include "Common/PerformanceCounter.h" #include "Common/StringUtil.h" #include "Common/Swap.h" -#include "Common/Thread.h" #include "Common/x64ABI.h" #include "Core/Core.h" #include "Core/CoreTiming.h" @@ -121,130 +116,12 @@ using namespace PowerPC; and such, but it's currently limited to integer ops only. This can definitely be made better. */ -// The BLR optimization is nice, but it means that JITted code can overflow the -// native stack by repeatedly running BL. (The chance of this happening in any -// retail game is close to 0, but correctness is correctness...) Also, the -// overflow might not happen directly in the JITted code but in a C++ function -// called from it, so we can't just adjust RSP in the case of a fault. -// Instead, we have to have extra stack space preallocated under the fault -// point which allows the code to continue, after wiping the JIT cache so we -// can reset things at a safe point. Once this condition trips, the -// optimization is permanently disabled, under the assumption this will never -// happen in practice. - -// On Unix, we just mark an appropriate region of the stack as PROT_NONE and -// handle it the same way as fastmem faults. It's safe to take a fault with a -// bad RSP, because on Linux we can use sigaltstack and on OS X we're already -// on a separate thread. - -// Windows is... under-documented. -// It already puts guard pages so it can automatically grow the stack and it -// doesn't look like there is a way to hook into a guard page fault and implement -// our own logic. -// But when windows reaches the last guard page, it raises a "Stack Overflow" -// exception which we can hook into, however by default it leaves you with less -// than 4kb of stack. So we use SetThreadStackGuarantee to trigger the Stack -// Overflow early while we still have 256kb of stack remaining. -// After resetting the stack to the top, we call _resetstkoflw() to restore -// the guard page at the 256kb mark. - -enum -{ - SAFE_STACK_SIZE = 256 * 1024, - MIN_UNSAFE_STACK_SIZE = 192 * 1024, - MIN_STACK_SIZE = SAFE_STACK_SIZE + MIN_UNSAFE_STACK_SIZE, - GUARD_SIZE = 64 * 1024, - GUARD_OFFSET = SAFE_STACK_SIZE - GUARD_SIZE, -}; - Jit64::Jit64() : QuantizedMemoryRoutines(*this) { } Jit64::~Jit64() = default; -void Jit64::ProtectStack() -{ - if (!m_enable_blr_optimization) - return; - -#ifdef _WIN32 - ULONG reserveSize = SAFE_STACK_SIZE; - SetThreadStackGuarantee(&reserveSize); -#else - auto [stack_addr, stack_size] = Common::GetCurrentThreadStack(); - - const uintptr_t stack_base_addr = reinterpret_cast(stack_addr); - const uintptr_t stack_middle_addr = reinterpret_cast(&stack_addr); - if (stack_middle_addr < stack_base_addr || stack_middle_addr >= stack_base_addr + stack_size) - { - PanicAlertFmt("Failed to get correct stack base"); - m_enable_blr_optimization = false; - return; - } - - const long page_size = sysconf(_SC_PAGESIZE); - if (page_size <= 0) - { - PanicAlertFmt("Failed to get page size"); - m_enable_blr_optimization = false; - return; - } - - const uintptr_t stack_guard_addr = Common::AlignUp(stack_base_addr + GUARD_OFFSET, page_size); - if (stack_guard_addr >= stack_middle_addr || - stack_middle_addr - stack_guard_addr < GUARD_SIZE + MIN_UNSAFE_STACK_SIZE) - { - PanicAlertFmt("Stack is too small for BLR optimization (size {:x}, base {:x}, current stack " - "pointer {:x}, alignment {:x})", - stack_size, stack_base_addr, stack_middle_addr, page_size); - m_enable_blr_optimization = false; - return; - } - - m_stack_guard = reinterpret_cast(stack_guard_addr); - Common::ReadProtectMemory(m_stack_guard, GUARD_SIZE); -#endif -} - -void Jit64::UnprotectStack() -{ -#ifndef _WIN32 - if (m_stack_guard) - { - Common::UnWriteProtectMemory(m_stack_guard, GUARD_SIZE); - m_stack_guard = nullptr; - } -#endif -} - -bool Jit64::HandleStackFault() -{ - // It's possible the stack fault might have been caused by something other than - // the BLR optimization. If the fault was triggered from another thread, or - // when BLR optimization isn't enabled then there is nothing we can do about the fault. - // Return false so the regular stack overflow handler can trigger (which crashes) - if (!m_enable_blr_optimization || !Core::IsCPUThread()) - return false; - - WARN_LOG_FMT(POWERPC, "BLR cache disabled due to excessive BL in the emulated program."); - - UnprotectStack(); - m_enable_blr_optimization = false; - - // We're going to need to clear the whole cache to get rid of the bad - // CALLs, but we can't yet. Fake the downcount so we're forced to the - // dispatcher (no block linking), and clear the cache so we're sent to - // Jit. In the case of Windows, we will also need to call _resetstkoflw() - // to reset the guard page. - // Yeah, it's kind of gross. - GetBlockCache()->InvalidateICache(0, 0xffffffff, true); - Core::System::GetInstance().GetCoreTiming().ForceExceptionCheck(0); - m_cleanup_after_stackfault = true; - - return true; -} - bool Jit64::HandleFault(uintptr_t access_address, SContext* ctx) { const uintptr_t stack_guard = reinterpret_cast(m_stack_guard); @@ -400,11 +277,6 @@ void Jit64::Init() m_const_pool.Init(AllocChildCodeSpace(constpool_size), constpool_size); ResetCodePtr(); - // BLR optimization has the same consequences as block linking, as well as - // depending on the fault handler to be safe in the event of excessive BL. - m_enable_blr_optimization = jo.enableBlocklink && m_fastmem_enabled && !m_enable_debugging; - m_cleanup_after_stackfault = false; - m_stack_guard = nullptr; blocks.Init(); @@ -819,15 +691,7 @@ void Jit64::Jit(u32 em_address) void Jit64::Jit(u32 em_address, bool clear_cache_and_retry_on_failure) { - if (m_cleanup_after_stackfault) - { - ClearCache(); - m_cleanup_after_stackfault = false; -#ifdef _WIN32 - // The stack is in an invalid state with no guard page, reset it. - _resetstkoflw(); -#endif - } + CleanUpAfterStackFault(); if (trampolines.IsAlmostFull() || SConfig::GetInstance().bJITNoBlockCache) { diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 4d96ea518d..c090af4dea 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -50,7 +50,6 @@ public: void Shutdown() override; bool HandleFault(uintptr_t access_address, SContext* ctx) override; - bool HandleStackFault() override; bool BackPatch(SContext* ctx); void EnableOptimization(); @@ -255,9 +254,6 @@ private: bool HandleFunctionHooking(u32 address); - void ProtectStack(); - void UnprotectStack(); - void ResetFreeMemoryRanges(); JitBlockCache blocks{*this}; @@ -268,10 +264,6 @@ private: Jit64AsmRoutineManager asm_routines{*this}; - bool m_enable_blr_optimization = false; - bool m_cleanup_after_stackfault = false; - u8* m_stack_guard = nullptr; - HyoutaUtilities::RangeSizeSet m_free_ranges_near; HyoutaUtilities::RangeSizeSet m_free_ranges_far; }; diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 78b837bfaf..08d137631c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -5,13 +5,6 @@ #include -#ifdef _WIN32 -#include -#else -#include -#endif - -#include "Common/Align.h" #include "Common/Arm64Emitter.h" #include "Common/CommonTypes.h" #include "Common/Logging/Log.h" @@ -19,7 +12,6 @@ #include "Common/MsgHandler.h" #include "Common/PerformanceCounter.h" #include "Common/StringUtil.h" -#include "Common/Thread.h" #include "Core/ConfigManager.h" #include "Core/Core.h" @@ -46,12 +38,6 @@ constexpr size_t CODE_SIZE = 1024 * 1024 * 32; constexpr size_t FARCODE_SIZE = 1024 * 1024 * 64; constexpr size_t FARCODE_SIZE_MMU = 1024 * 1024 * 64; -constexpr size_t SAFE_STACK_SIZE = 256 * 1024; -constexpr size_t MIN_UNSAFE_STACK_SIZE = 192 * 1024; -constexpr size_t MIN_STACK_SIZE = SAFE_STACK_SIZE + MIN_UNSAFE_STACK_SIZE; -constexpr size_t GUARD_SIZE = 64 * 1024; -constexpr size_t GUARD_OFFSET = SAFE_STACK_SIZE - GUARD_SIZE; - JitArm64::JitArm64() : m_float_emit(this) { } @@ -80,9 +66,6 @@ void JitArm64::Init() code_block.m_gpa = &js.gpa; code_block.m_fpa = &js.fpa; - m_enable_blr_optimization = jo.enableBlocklink && m_fastmem_enabled && !m_enable_debugging; - m_cleanup_after_stackfault = false; - GenerateAsm(); ResetFreeMemoryRanges(); @@ -163,23 +146,6 @@ bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx) return success; } -bool JitArm64::HandleStackFault() -{ - if (!m_enable_blr_optimization) - return false; - - ERROR_LOG_FMT(POWERPC, "BLR cache disabled due to excessive BL in the emulated program."); - - UnprotectStack(); - m_enable_blr_optimization = false; - - GetBlockCache()->InvalidateICache(0, 0xffffffff, true); - Core::System::GetInstance().GetCoreTiming().ForceExceptionCheck(0); - m_cleanup_after_stackfault = true; - - return true; -} - void JitArm64::ClearCache() { m_fault_to_handler.clear(); @@ -343,59 +309,6 @@ void JitArm64::ResetStack() ADD(ARM64Reg::SP, ARM64Reg::X0, 0); } -void JitArm64::ProtectStack() -{ - if (!m_enable_blr_optimization) - return; - -#ifdef _WIN32 - ULONG reserveSize = SAFE_STACK_SIZE; - SetThreadStackGuarantee(&reserveSize); -#else - auto [stack_addr, stack_size] = Common::GetCurrentThreadStack(); - - const uintptr_t stack_base_addr = reinterpret_cast(stack_addr); - const uintptr_t stack_middle_addr = reinterpret_cast(&stack_addr); - if (stack_middle_addr < stack_base_addr || stack_middle_addr >= stack_base_addr + stack_size) - { - PanicAlertFmt("Failed to get correct stack base"); - m_enable_blr_optimization = false; - return; - } - - const long page_size = sysconf(_SC_PAGESIZE); - if (page_size <= 0) - { - PanicAlertFmt("Failed to get page size"); - m_enable_blr_optimization = false; - return; - } - - const uintptr_t stack_guard_addr = Common::AlignUp(stack_base_addr + GUARD_OFFSET, page_size); - if (stack_guard_addr >= stack_middle_addr || - stack_middle_addr - stack_guard_addr < GUARD_SIZE + MIN_UNSAFE_STACK_SIZE) - { - PanicAlertFmt("Stack is too small for BLR optimization (size {:x}, base {:x}, current stack " - "pointer {:x}, alignment {:x})", - stack_size, stack_base_addr, stack_middle_addr, page_size); - m_enable_blr_optimization = false; - return; - } - - m_stack_guard = reinterpret_cast(stack_guard_addr); - Common::ReadProtectMemory(m_stack_guard, GUARD_SIZE); -#endif -} - -void JitArm64::UnprotectStack() -{ -#ifndef _WIN32 - if (m_stack_guard) - Common::UnWriteProtectMemory(m_stack_guard, GUARD_SIZE); - m_stack_guard = nullptr; -#endif -} - void JitArm64::IntializeSpeculativeConstants() { // If the block depends on an input register which looks like a gather pipe or MMIO related @@ -773,15 +686,7 @@ void JitArm64::Jit(u32 em_address) void JitArm64::Jit(u32 em_address, bool clear_cache_and_retry_on_failure) { - if (m_cleanup_after_stackfault) - { - ClearCache(); - m_cleanup_after_stackfault = false; -#ifdef _WIN32 - // The stack is in an invalid state with no guard page, reset it. - _resetstkoflw(); -#endif - } + CleanUpAfterStackFault(); if (SConfig::GetInstance().bJITNoBlockCache) ClearCache(); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index ca8fd80a30..629cdafede 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -32,7 +32,6 @@ public: bool IsInCodeSpace(const u8* ptr) const { return IsInSpace(ptr); } bool HandleFault(uintptr_t access_address, SContext* ctx) override; void DoBacktrace(uintptr_t access_address, SContext* ctx); - bool HandleStackFault() override; bool HandleFastmemFault(SContext* ctx); void ClearCache() override; @@ -288,8 +287,6 @@ protected: void DoDownCount(); void Cleanup(); void ResetStack(); - void ProtectStack(); - void UnprotectStack(); void ResetFreeMemoryRanges(); @@ -363,10 +360,6 @@ protected: u8* m_near_code_end = nullptr; bool m_near_code_write_failed = false; - bool m_enable_blr_optimization = false; - bool m_cleanup_after_stackfault = false; - u8* m_stack_guard = nullptr; - HyoutaUtilities::RangeSizeSet m_free_ranges_near; HyoutaUtilities::RangeSizeSet m_free_ranges_far; }; diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp index 7b450f5d12..9800123075 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp @@ -3,15 +3,53 @@ #include "Core/PowerPC/JitCommon/JitBase.h" +#include "Common/Align.h" #include "Common/CommonTypes.h" +#include "Common/MemoryUtil.h" +#include "Common/Thread.h" #include "Core/Config/MainSettings.h" #include "Core/ConfigManager.h" #include "Core/Core.h" +#include "Core/CoreTiming.h" #include "Core/HW/CPU.h" #include "Core/PowerPC/PPCAnalyst.h" #include "Core/PowerPC/PowerPC.h" #include "Core/System.h" +#ifdef _WIN32 +#include +#include +#else +#include +#endif + +// The BLR optimization is nice, but it means that JITted code can overflow the +// native stack by repeatedly running BL. (The chance of this happening in any +// retail game is close to 0, but correctness is correctness...) Also, the +// overflow might not happen directly in the JITted code but in a C++ function +// called from it, so we can't just adjust RSP in the case of a fault. +// Instead, we have to have extra stack space preallocated under the fault +// point which allows the code to continue, after wiping the JIT cache so we +// can reset things at a safe point. Once this condition trips, the +// optimization is permanently disabled, under the assumption this will never +// happen in practice. + +// On Unix, we just mark an appropriate region of the stack as PROT_NONE and +// handle it the same way as fastmem faults. It's safe to take a fault with a +// bad RSP, because on Linux we can use sigaltstack and on OS X we're already +// on a separate thread. + +// Windows is... under-documented. +// It already puts guard pages so it can automatically grow the stack and it +// doesn't look like there is a way to hook into a guard page fault and implement +// our own logic. +// But when windows reaches the last guard page, it raises a "Stack Overflow" +// exception which we can hook into, however by default it leaves you with less +// than 4kb of stack. So we use SetThreadStackGuarantee to trigger the Stack +// Overflow early while we still have 256kb of stack remaining. +// After resetting the stack to the top, we call _resetstkoflw() to restore +// the guard page at the 256kb mark. + const u8* JitBase::Dispatch(JitBase& jit) { return jit.GetBlockCache()->Dispatch(); @@ -72,6 +110,107 @@ void JitBase::RefreshConfig() analyzer.SetDivByZeroExceptionsEnabled(m_enable_div_by_zero_exceptions); } +void JitBase::InitBLROptimization() +{ + m_enable_blr_optimization = jo.enableBlocklink && m_fastmem_enabled && !m_enable_debugging; + m_cleanup_after_stackfault = false; +} + +void JitBase::ProtectStack() +{ + if (!m_enable_blr_optimization) + return; + +#ifdef _WIN32 + ULONG reserveSize = SAFE_STACK_SIZE; + SetThreadStackGuarantee(&reserveSize); +#else + auto [stack_addr, stack_size] = Common::GetCurrentThreadStack(); + + const uintptr_t stack_base_addr = reinterpret_cast(stack_addr); + const uintptr_t stack_middle_addr = reinterpret_cast(&stack_addr); + if (stack_middle_addr < stack_base_addr || stack_middle_addr >= stack_base_addr + stack_size) + { + PanicAlertFmt("Failed to get correct stack base"); + m_enable_blr_optimization = false; + return; + } + + const long page_size = sysconf(_SC_PAGESIZE); + if (page_size <= 0) + { + PanicAlertFmt("Failed to get page size"); + m_enable_blr_optimization = false; + return; + } + + const uintptr_t stack_guard_addr = Common::AlignUp(stack_base_addr + GUARD_OFFSET, page_size); + if (stack_guard_addr >= stack_middle_addr || + stack_middle_addr - stack_guard_addr < GUARD_SIZE + MIN_UNSAFE_STACK_SIZE) + { + PanicAlertFmt("Stack is too small for BLR optimization (size {:x}, base {:x}, current stack " + "pointer {:x}, alignment {:x})", + stack_size, stack_base_addr, stack_middle_addr, page_size); + m_enable_blr_optimization = false; + return; + } + + m_stack_guard = reinterpret_cast(stack_guard_addr); + Common::ReadProtectMemory(m_stack_guard, GUARD_SIZE); +#endif +} + +void JitBase::UnprotectStack() +{ +#ifndef _WIN32 + if (m_stack_guard) + { + Common::UnWriteProtectMemory(m_stack_guard, GUARD_SIZE); + m_stack_guard = nullptr; + } +#endif +} + +bool JitBase::HandleStackFault() +{ + // It's possible the stack fault might have been caused by something other than + // the BLR optimization. If the fault was triggered from another thread, or + // when BLR optimization isn't enabled then there is nothing we can do about the fault. + // Return false so the regular stack overflow handler can trigger (which crashes) + if (!m_enable_blr_optimization || !Core::IsCPUThread()) + return false; + + WARN_LOG_FMT(POWERPC, "BLR cache disabled due to excessive BL in the emulated program."); + + UnprotectStack(); + m_enable_blr_optimization = false; + + // We're going to need to clear the whole cache to get rid of the bad + // CALLs, but we can't yet. Fake the downcount so we're forced to the + // dispatcher (no block linking), and clear the cache so we're sent to + // Jit. In the case of Windows, we will also need to call _resetstkoflw() + // to reset the guard page. + // Yeah, it's kind of gross. + GetBlockCache()->InvalidateICache(0, 0xffffffff, true); + Core::System::GetInstance().GetCoreTiming().ForceExceptionCheck(0); + m_cleanup_after_stackfault = true; + + return true; +} + +void JitBase::CleanUpAfterStackFault() +{ + if (m_cleanup_after_stackfault) + { + ClearCache(); + m_cleanup_after_stackfault = false; +#ifdef _WIN32 + // The stack is in an invalid state with no guard page, reset it. + _resetstkoflw(); +#endif + } +} + bool JitBase::CanMergeNextInstructions(int count) const { if (CPU::IsStepping() || js.instructionsLeft < count) diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index ad218ed8a3..bebb0aa7dd 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -54,6 +54,12 @@ protected: #endif }; + static constexpr size_t SAFE_STACK_SIZE = 256 * 1024; + static constexpr size_t MIN_UNSAFE_STACK_SIZE = 192 * 1024; + static constexpr size_t MIN_STACK_SIZE = SAFE_STACK_SIZE + MIN_UNSAFE_STACK_SIZE; + static constexpr size_t GUARD_SIZE = 64 * 1024; + static constexpr size_t GUARD_OFFSET = SAFE_STACK_SIZE - GUARD_SIZE; + struct JitOptions { bool enableBlocklink; @@ -138,8 +144,17 @@ protected: bool m_pause_on_panic_enabled = false; bool m_accurate_cpu_cache_enabled = false; + bool m_enable_blr_optimization = false; + bool m_cleanup_after_stackfault = false; + u8* m_stack_guard = nullptr; + void RefreshConfig(); + void InitBLROptimization(); + void ProtectStack(); + void UnprotectStack(); + void CleanUpAfterStackFault(); + bool CanMergeNextInstructions(int count) const; void UpdateMemoryAndExceptionOptions(); @@ -160,7 +175,7 @@ public: virtual const CommonAsmRoutinesBase* GetAsmRoutines() = 0; virtual bool HandleFault(uintptr_t access_address, SContext* ctx) = 0; - virtual bool HandleStackFault() { return false; } + bool HandleStackFault(); static constexpr std::size_t code_buffer_size = 32000;