From 52fe05af6bbf4cc32eff40fa7f7f8c035bef349a Mon Sep 17 00:00:00 2001 From: aldelaro5 Date: Fri, 24 Feb 2017 21:10:22 -0500 Subject: [PATCH 1/2] Make memory breakpoint faster Currently, slowmem is used at any time that memory breakpoints are in use. This commit makes it so that whenever the DBAT gets updated, if the address is overllaping any memchecks, it forces the use of slowmem. This allows to keep fastmem for any other cases and noticably increases performance when using memory breakpoints. --- Source/Core/Core/PowerPC/BreakPoints.cpp | 22 ++++++++++++++----- .../Core/PowerPC/Jit64Common/EmuCodeBlock.cpp | 4 ++-- .../Core/Core/PowerPC/JitCommon/JitBase.cpp | 3 +-- Source/Core/Core/PowerPC/JitCommon/JitBase.h | 1 - .../Core/Core/PowerPC/JitCommon/JitCache.cpp | 13 ----------- Source/Core/Core/PowerPC/JitCommon/JitCache.h | 1 - Source/Core/Core/PowerPC/MMU.cpp | 20 +++++++++++++++++ 7 files changed, 39 insertions(+), 25 deletions(-) diff --git a/Source/Core/Core/PowerPC/BreakPoints.cpp b/Source/Core/Core/PowerPC/BreakPoints.cpp index 4ef86f95a7..6bca42b4b3 100644 --- a/Source/Core/Core/PowerPC/BreakPoints.cpp +++ b/Source/Core/Core/PowerPC/BreakPoints.cpp @@ -11,8 +11,10 @@ #include "Common/CommonTypes.h" #include "Common/DebugInterface.h" +#include "Core/Core.h" #include "Core/PowerPC/JitCommon/JitBase.h" #include "Core/PowerPC/JitCommon/JitCache.h" +#include "Core/PowerPC/PowerPC.h" bool BreakPoints::IsAddressBreakPoint(u32 address) const { @@ -168,13 +170,18 @@ void MemChecks::AddFromStrings(const TMemChecksStr& mc_strings) void MemChecks::Add(const TMemCheck& memory_check) { - bool had_any = HasAny(); if (GetMemCheck(memory_check.start_address) == nullptr) + { + bool had_any = HasAny(); m_mem_checks.push_back(memory_check); - // If this is the first one, clear the JIT cache so it can switch to - // watchpoint-compatible code. - if (!had_any && g_jit) - g_jit->GetBlockCache()->SchedulateClearCacheThreadSafe(); + bool lock = Core::PauseAndLock(true); + // If this is the first one, clear the JIT cache so it can switch to + // watchpoint-compatible code. + if (!had_any && g_jit) + g_jit->ClearCache(); + PowerPC::DBATUpdated(); + Core::PauseAndLock(false, lock); + } } void MemChecks::Remove(u32 address) @@ -184,8 +191,11 @@ void MemChecks::Remove(u32 address) if (i->start_address == address) { m_mem_checks.erase(i); + bool lock = Core::PauseAndLock(true); if (!HasAny() && g_jit) - g_jit->GetBlockCache()->SchedulateClearCacheThreadSafe(); + g_jit->ClearCache(); + PowerPC::DBATUpdated(); + Core::PauseAndLock(false, lock); return; } } diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp index 73c4cae584..62f78c7790 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp @@ -335,7 +335,7 @@ void EmuCodeBlock::MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value, void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize, s32 offset, BitSet32 registersInUse, bool signExtend, int flags) { - bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0 || g_jit->jo.alwaysUseMemFuncs; + bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0; registersInUse[reg_value] = false; if (g_jit->jo.fastmem && !(flags & SAFE_LOADSTORE_NO_FASTMEM) && !slowmem) @@ -492,7 +492,7 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces BitSet32 registersInUse, int flags) { bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP); - bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0 || g_jit->jo.alwaysUseMemFuncs; + bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0; // set the correct immediate format reg_value = FixImmediate(accessSize, reg_value); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp index 455acf5cfe..0b047fca97 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp @@ -46,7 +46,6 @@ bool JitBase::MergeAllowedNextInstructions(int count) void JitBase::UpdateMemoryOptions() { bool any_watchpoints = PowerPC::memchecks.HasAny(); - jo.fastmem = SConfig::GetInstance().bFastmem && !any_watchpoints; + jo.fastmem = SConfig::GetInstance().bFastmem; jo.memcheck = SConfig::GetInstance().bMMU || any_watchpoints; - jo.alwaysUseMemFuncs = any_watchpoints; } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index c3c63dad41..f62776d6c3 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -52,7 +52,6 @@ protected: bool accurateSinglePrecision; bool fastmem; bool memcheck; - bool alwaysUseMemFuncs; }; struct JitState { diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index 54ce8c27f9..d04b4d22ac 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -29,13 +29,6 @@ using namespace Gen; -static CoreTiming::EventType* s_clear_jit_cache_thread_safe; - -static void ClearCacheThreadSafe(u64 userdata, s64 cyclesdata) -{ - JitInterface::ClearCache(); -} - bool JitBlock::OverlapsPhysicalRange(u32 address, u32 length) const { return physical_addresses.lower_bound(address) != @@ -50,7 +43,6 @@ JitBaseBlockCache::~JitBaseBlockCache() = default; void JitBaseBlockCache::Init() { - s_clear_jit_cache_thread_safe = CoreTiming::RegisterEvent("clearJitCache", ClearCacheThreadSafe); JitRegister::Init(SConfig::GetInstance().m_perfDir); Clear(); @@ -89,11 +81,6 @@ void JitBaseBlockCache::Reset() Init(); } -void JitBaseBlockCache::SchedulateClearCacheThreadSafe() -{ - CoreTiming::ScheduleEvent(0, s_clear_jit_cache_thread_safe, 0, CoreTiming::FromThread::NON_CPU); -} - JitBlock** JitBaseBlockCache::GetFastBlockMap() { return fast_block_map.data(); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index 485be781dd..76e2d4dbf9 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -125,7 +125,6 @@ public: void Shutdown(); void Clear(); void Reset(); - void SchedulateClearCacheThreadSafe(); // Code Cache JitBlock** GetFastBlockMap(); diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index 416174a995..992af28ac7 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -1132,6 +1132,24 @@ static TranslateAddressResult TranslatePageAddress(const u32 address, const XChe return TranslateAddressResult{TranslateAddressResult::PAGE_FAULT, 0}; } +static bool overlaps_memcheck(u32 pageEndAddress) +{ + if (!memchecks.HasAny()) + return false; + u32 page_end_suffix = ((1 << BAT_INDEX_SHIFT)) - 1; + for (TMemCheck memcheck : memchecks.GetMemChecks()) + { + if (((memcheck.start_address | page_end_suffix) == pageEndAddress || + (memcheck.end_address | page_end_suffix) == pageEndAddress) || + ((memcheck.start_address | page_end_suffix) < pageEndAddress && + (memcheck.end_address | page_end_suffix) > pageEndAddress)) + { + return true; + } + } + return false; +} + static void UpdateBATs(BatTable& bat_table, u32 base_spr) { // TODO: Separate BATs for MSR.PR==0 and MSR.PR==1 @@ -1191,6 +1209,8 @@ static void UpdateBATs(BatTable& bat_table, u32 base_spr) valid_bit = 0x3; else if ((address >> 28) == 0xE && (address < (0xE0000000 + Memory::L1_CACHE_SIZE))) valid_bit = 0x3; + if (overlaps_memcheck(((batu.BEPI | j) << BAT_INDEX_SHIFT) | ((1 << BAT_INDEX_SHIFT) - 1))) + valid_bit &= ~0x2; // (BEPI | j) == (BEPI & ~BL) | (j & BL). bat_table[batu.BEPI | j] = address | valid_bit; From 9ad6c8f334b07f28633734eb8a240a85c48238a9 Mon Sep 17 00:00:00 2001 From: aldelaro5 Date: Tue, 28 Feb 2017 17:32:17 -0500 Subject: [PATCH 2/2] Make memory breakpoint faster Currently, slowmem is used at any time that memory breakpoints are in use. This commit makes it so that whenever the DBAT gets updated, if the address is overllaping any memchecks, it forces the use of slowmem. This allows to keep fastmem for any other cases and noticably increases performance when using memory breakpoints. --- Source/Core/Core/PowerPC/BreakPoints.cpp | 23 +++++++++++++++++-- Source/Core/Core/PowerPC/BreakPoints.h | 1 + .../Core/Core/PowerPC/JitCommon/JitBase.cpp | 2 +- Source/Core/Core/PowerPC/MMU.cpp | 22 +++--------------- 4 files changed, 26 insertions(+), 22 deletions(-) diff --git a/Source/Core/Core/PowerPC/BreakPoints.cpp b/Source/Core/Core/PowerPC/BreakPoints.cpp index 6bca42b4b3..8714efda87 100644 --- a/Source/Core/Core/PowerPC/BreakPoints.cpp +++ b/Source/Core/Core/PowerPC/BreakPoints.cpp @@ -173,8 +173,8 @@ void MemChecks::Add(const TMemCheck& memory_check) if (GetMemCheck(memory_check.start_address) == nullptr) { bool had_any = HasAny(); - m_mem_checks.push_back(memory_check); bool lock = Core::PauseAndLock(true); + m_mem_checks.push_back(memory_check); // If this is the first one, clear the JIT cache so it can switch to // watchpoint-compatible code. if (!had_any && g_jit) @@ -190,8 +190,8 @@ void MemChecks::Remove(u32 address) { if (i->start_address == address) { - m_mem_checks.erase(i); bool lock = Core::PauseAndLock(true); + m_mem_checks.erase(i); if (!HasAny() && g_jit) g_jit->ClearCache(); PowerPC::DBATUpdated(); @@ -220,6 +220,25 @@ TMemCheck* MemChecks::GetMemCheck(u32 address) return nullptr; } +bool MemChecks::OverlapsMemcheck(u32 address, u32 length) +{ + if (!HasAny()) + return false; + u32 page_end_suffix = length - 1; + u32 page_end_address = address | page_end_suffix; + for (TMemCheck memcheck : m_mem_checks) + { + if (((memcheck.start_address | page_end_suffix) == page_end_address || + (memcheck.end_address | page_end_suffix) == page_end_address) || + ((memcheck.start_address | page_end_suffix) < page_end_address && + (memcheck.end_address | page_end_suffix) > page_end_address)) + { + return true; + } + } + return false; +} + bool TMemCheck::Action(DebugInterface* debug_interface, u32 value, u32 addr, bool write, int size, u32 pc) { diff --git a/Source/Core/Core/PowerPC/BreakPoints.h b/Source/Core/Core/PowerPC/BreakPoints.h index b9481b5d18..cce1756513 100644 --- a/Source/Core/Core/PowerPC/BreakPoints.h +++ b/Source/Core/Core/PowerPC/BreakPoints.h @@ -87,6 +87,7 @@ public: // memory breakpoint TMemCheck* GetMemCheck(u32 address); + bool OverlapsMemcheck(u32 address, u32 length); void Remove(u32 address); void Clear() { m_mem_checks.clear(); } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp index 0b047fca97..9b0c837610 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp @@ -46,6 +46,6 @@ bool JitBase::MergeAllowedNextInstructions(int count) void JitBase::UpdateMemoryOptions() { bool any_watchpoints = PowerPC::memchecks.HasAny(); - jo.fastmem = SConfig::GetInstance().bFastmem; + jo.fastmem = SConfig::GetInstance().bFastmem && (UReg_MSR(MSR).DR || !any_watchpoints); jo.memcheck = SConfig::GetInstance().bMMU || any_watchpoints; } diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index 992af28ac7..583d170f43 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -1132,24 +1132,6 @@ static TranslateAddressResult TranslatePageAddress(const u32 address, const XChe return TranslateAddressResult{TranslateAddressResult::PAGE_FAULT, 0}; } -static bool overlaps_memcheck(u32 pageEndAddress) -{ - if (!memchecks.HasAny()) - return false; - u32 page_end_suffix = ((1 << BAT_INDEX_SHIFT)) - 1; - for (TMemCheck memcheck : memchecks.GetMemChecks()) - { - if (((memcheck.start_address | page_end_suffix) == pageEndAddress || - (memcheck.end_address | page_end_suffix) == pageEndAddress) || - ((memcheck.start_address | page_end_suffix) < pageEndAddress && - (memcheck.end_address | page_end_suffix) > pageEndAddress)) - { - return true; - } - } - return false; -} - static void UpdateBATs(BatTable& bat_table, u32 base_spr) { // TODO: Separate BATs for MSR.PR==0 and MSR.PR==1 @@ -1209,7 +1191,9 @@ static void UpdateBATs(BatTable& bat_table, u32 base_spr) valid_bit = 0x3; else if ((address >> 28) == 0xE && (address < (0xE0000000 + Memory::L1_CACHE_SIZE))) valid_bit = 0x3; - if (overlaps_memcheck(((batu.BEPI | j) << BAT_INDEX_SHIFT) | ((1 << BAT_INDEX_SHIFT) - 1))) + // Fastmem doesn't support memchecks, so disable it for all overlapping virtual pages. + if (PowerPC::memchecks.OverlapsMemcheck(((batu.BEPI | j) << BAT_INDEX_SHIFT), + 1 << BAT_INDEX_SHIFT)) valid_bit &= ~0x2; // (BEPI | j) == (BEPI & ~BL) | (j & BL).