From dc0fbc15f057d50dd6adf3815bfad604f8fb925d Mon Sep 17 00:00:00 2001 From: degasus Date: Sun, 22 Jan 2017 11:57:51 +0100 Subject: [PATCH 1/5] JitCache: Drop block_map. It is only used for invalidation, and in a bad way. Just scan over all elements, as it is still in O(n), this shouldn't matter much. --- .../Core/Core/PowerPC/JitCommon/JitCache.cpp | 53 +++++++++---------- Source/Core/Core/PowerPC/JitCommon/JitCache.h | 16 +++--- 2 files changed, 31 insertions(+), 38 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index c2ac9fd07a..1a8d1c6df9 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -36,6 +36,15 @@ static void ClearCacheThreadSafe(u64 userdata, s64 cyclesdata) JitInterface::ClearCache(); } +bool JitBlock::Overlap(u32 addr, u32 length) +{ + if (addr >= physicalAddress + originalSize) + return false; + if (physicalAddress >= addr + length) + return false; + return true; +} + JitBaseBlockCache::JitBaseBlockCache(JitBase& jit) : m_jit{jit} { } @@ -64,13 +73,12 @@ void JitBaseBlockCache::Clear() #endif m_jit.js.fifoWriteAddresses.clear(); m_jit.js.pairedQuantizeAddresses.clear(); - for (auto& e : start_block_map) + for (auto& e : block_map) { DestroyBlock(e.second); } - start_block_map.clear(); - links_to.clear(); block_map.clear(); + links_to.clear(); valid_block.ClearAll(); @@ -95,14 +103,14 @@ JitBlock** JitBaseBlockCache::GetFastBlockMap() void JitBaseBlockCache::RunOnBlocks(std::function f) { - for (const auto& e : start_block_map) + for (const auto& e : block_map) f(e.second); } JitBlock* JitBaseBlockCache::AllocateBlock(u32 em_address) { u32 physicalAddress = PowerPC::JitCache_TranslateAddress(em_address).address; - JitBlock& b = start_block_map.emplace(physicalAddress, JitBlock())->second; + JitBlock& b = block_map.emplace(physicalAddress, JitBlock())->second; b.effectiveAddress = em_address; b.physicalAddress = physicalAddress; b.msrBits = MSR & JIT_CACHE_MSR_MASK; @@ -111,18 +119,6 @@ JitBlock* JitBaseBlockCache::AllocateBlock(u32 em_address) return &b; } -void JitBaseBlockCache::FreeBlock(JitBlock* block) -{ - auto iter = start_block_map.equal_range(block->physicalAddress); - while (iter.first != iter.second) - { - if (&iter.first->second == block) - iter.first = start_block_map.erase(iter.first); - else - iter.first++; - } -} - void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link, const u8* code_ptr) { size_t index = FastLookupIndexForAddress(block.effectiveAddress); @@ -134,8 +130,6 @@ void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link, const u8 for (u32 addr = pAddr / 32; addr <= (pAddr + (block.originalSize - 1) * 4) / 32; ++addr) valid_block.Set(addr); - block_map.emplace(std::make_pair(pAddr + 4 * block.originalSize - 1, pAddr), &block); - if (block_link) { for (const auto& e : block.linkData) @@ -162,7 +156,7 @@ JitBlock* JitBaseBlockCache::GetBlockFromStartAddress(u32 addr, u32 msr) translated_addr = translated.address; } - auto iter = start_block_map.equal_range(translated_addr); + auto iter = block_map.equal_range(translated_addr); for (; iter.first != iter.second; iter.first++) { JitBlock& b = iter.first->second; @@ -204,17 +198,20 @@ void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool for } // destroy JIT blocks - // !! this works correctly under assumption that any two overlapping blocks end at the same - // address if (destroy_block) { - auto it = block_map.lower_bound(std::make_pair(pAddr, 0)); - while (it != block_map.end() && it->first.second < pAddr + length) + auto iter = block_map.begin(); + while (iter != block_map.end()) { - JitBlock* block = it->second; - DestroyBlock(*block); - FreeBlock(block); - it = block_map.erase(it); + if (iter->second.Overlap(pAddr, length)) + { + DestroyBlock(iter->second); + iter = block_map.erase(iter); + } + else + { + iter++; + } } // If the code was actually modified, we need to clear the relevant entries from the diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index c5aaa8d1e8..28111437fb 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -24,6 +24,8 @@ class JitBase; // address. struct JitBlock { + bool Overlap(u32 addr, u32 length); + // A special entry point for block linking; usually used to check the // downcount. const u8* checkedEntry; @@ -35,8 +37,8 @@ struct JitBlock // The MSR bits expected for this block to be valid; see JIT_CACHE_MSR_MASK. u32 msrBits; // The physical address of the code represented by this block. - // Various maps in the cache are indexed by this (start_block_map, - // block_map, and valid_block in particular). This is useful because of + // Various maps in the cache are indexed by this (block_map + // and valid_block in particular). This is useful because of // of the way the instruction cache works on PowerPC. u32 physicalAddress; // The number of bytes of JIT'ed code contained in this block. Mostly @@ -124,7 +126,6 @@ public: void RunOnBlocks(std::function f); JitBlock* AllocateBlock(u32 em_address); - void FreeBlock(JitBlock* block); void FinalizeBlock(JitBlock& block, bool block_link, const u8* code_ptr); // Look for the block in the slow but accurate way. @@ -163,20 +164,15 @@ private: // It is used to query all blocks which links to an address. std::multimap links_to; // destination_PC -> number - // Map indexed by the physical memory location. - // It is used to invalidate blocks based on memory location. - std::multimap, JitBlock*> block_map; // (end_addr, start_addr) -> block - // Map indexed by the physical address of the entry point. // This is used to query the block based on the current PC in a slow way. - // TODO: This is redundant with block_map. - std::multimap start_block_map; // start_addr -> block + std::multimap block_map; // start_addr -> block // This bitsets shows which cachelines overlap with any blocks. // It is used to provide a fast way to query if no icache invalidation is needed. ValidBlockBitSet valid_block; // This array is indexed with the masked PC and likely holds the correct block id. - // This is used as a fast cache of start_block_map used in the assembly dispatcher. + // This is used as a fast cache of block_map used in the assembly dispatcher. std::array fast_block_map; // start_addr & mask -> number }; From f3ed993747f45ea2fbfa59bf72bc67cad22a2f76 Mon Sep 17 00:00:00 2001 From: degasus Date: Sun, 22 Jan 2017 12:58:57 +0100 Subject: [PATCH 2/5] JitCache: Use a map with macro blocks for the occupied memory regions. This also allow fast invalidation, without any restritions on the blocks itself. So we can now implement inlining. --- .../Core/Core/PowerPC/JitCommon/JitCache.cpp | 51 +++++++++++++++---- Source/Core/Core/PowerPC/JitCommon/JitCache.h | 7 +++ 2 files changed, 48 insertions(+), 10 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index 1a8d1c6df9..a3360a6dca 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -79,6 +79,7 @@ void JitBaseBlockCache::Clear() } block_map.clear(); links_to.clear(); + block_range_map.clear(); valid_block.ClearAll(); @@ -125,11 +126,16 @@ void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link, const u8 fast_block_map[index] = █ block.fast_block_map_index = index; - u32 pAddr = block.physicalAddress; + u32 block_start = block.physicalAddress; + u32 block_end = block_start + (block.originalSize - 1) * 4; - for (u32 addr = pAddr / 32; addr <= (pAddr + (block.originalSize - 1) * 4) / 32; ++addr) + for (u32 addr = block_start / 32; addr <= block_end / 32; ++addr) valid_block.Set(addr); + u32 mask = ~(BLOCK_RANGE_MAP_ELEMENTS - 1); + for (u32 addr = block_start & mask; addr <= (block_end & mask); addr += BLOCK_RANGE_MAP_ELEMENTS) + block_range_map[addr].insert(&block); + if (block_link) { for (const auto& e : block.linkData) @@ -200,18 +206,43 @@ void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool for // destroy JIT blocks if (destroy_block) { - auto iter = block_map.begin(); - while (iter != block_map.end()) + // Iterate over all macro blocks which overlap the given range. + u32 mask = ~(BLOCK_RANGE_MAP_ELEMENTS - 1); + auto start = block_range_map.lower_bound(pAddr & mask); + auto end = block_range_map.lower_bound(pAddr + length); + while (start != end) { - if (iter->second.Overlap(pAddr, length)) + // Iterate over all blocks in the macro block. + auto iter = start->second.begin(); + while (iter != start->second.end()) { - DestroyBlock(iter->second); - iter = block_map.erase(iter); + JitBlock* block = *iter; + if (block->Overlap(pAddr, length)) + { + // If the block overlaps, also remove all other occupied slots in the other macro blocks. + // This will leak empty macro blocks, but they may be reused or cleared later on. + u32 block_start = block->physicalAddress; + u32 block_end = block_start + (block->originalSize - 1) * 4; + for (u32 addr = block_start & mask; addr <= (block_end & mask); addr += BLOCK_RANGE_MAP_ELEMENTS) + if (addr != start->first) + block_range_map[addr].erase(block); + + // And remove the block. + DestroyBlock(*block); + block_map.erase(block->physicalAddress); + iter = start->second.erase(iter); + } + else + { + iter++; + } } + + // If the macro block is empty, drop it. + if (start->second.empty()) + start = block_range_map.erase(start); else - { - iter++; - } + start++; } // If the code was actually modified, we need to clear the relevant entries from the diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index 28111437fb..1baab02444 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include "Common/CommonTypes.h" @@ -168,6 +169,12 @@ private: // This is used to query the block based on the current PC in a slow way. std::multimap block_map; // start_addr -> block + // Range of overlapping code indexed by a masked physical address. + // This is used for invalidation of memory regions. The range is grouped + // in macro blocks of each 0x100 bytes. + static constexpr u32 BLOCK_RANGE_MAP_ELEMENTS = 0x100; + std::map> block_range_map; + // This bitsets shows which cachelines overlap with any blocks. // It is used to provide a fast way to query if no icache invalidation is needed. ValidBlockBitSet valid_block; From 3529af61c40dec93c3ee47e7aaffdb311d75a43f Mon Sep 17 00:00:00 2001 From: degasus Date: Sun, 22 Jan 2017 14:35:03 +0100 Subject: [PATCH 3/5] JitAnalyzer: Remove jit block cache workaround. --- Source/Core/Core/PowerPC/PPCAnalyst.cpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/Source/Core/Core/PowerPC/PPCAnalyst.cpp b/Source/Core/Core/PowerPC/PPCAnalyst.cpp index 11db99c2e6..7b44d0f9a7 100644 --- a/Source/Core/Core/PowerPC/PPCAnalyst.cpp +++ b/Source/Core/Core/PowerPC/PPCAnalyst.cpp @@ -653,7 +653,6 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, u32 u32 return_address = 0; u32 numFollows = 0; u32 num_inst = 0; - bool prev_inst_from_bat = true; for (u32 i = 0; i < blockSize; ++i) { @@ -666,16 +665,6 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, u32 } UGeckoInstruction inst = result.hex; - // Slight hack: the JIT block cache currently assumes all blocks end at the same place, - // but broken blocks due to page faults break this assumption. Avoid this by just ending - // all virtual memory instruction blocks at page boundaries. - // FIXME: improve the JIT block cache so we don't need to do this. - if ((!result.from_bat || !prev_inst_from_bat) && i > 0 && (address & 0xfff) == 0) - { - break; - } - prev_inst_from_bat = result.from_bat; - num_inst++; memset(&code[i], 0, sizeof(CodeOp)); GekkoOPInfo* opinfo = GetOpInfo(inst); From 70caf447b9245fa966b066af1e5424887dd5b3b5 Mon Sep 17 00:00:00 2001 From: degasus Date: Sun, 22 Jan 2017 16:23:56 +0100 Subject: [PATCH 4/5] JitCache: Get physical addresses from PPCAnalyst. So we support all kind of degenerated blocks now, not just range+length based ones. --- .../CachedInterpreter/CachedInterpreter.cpp | 2 +- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 3 +- Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp | 3 +- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 2 +- .../Core/Core/PowerPC/JitCommon/JitCache.cpp | 36 ++++++++----------- Source/Core/Core/PowerPC/JitCommon/JitCache.h | 5 ++- Source/Core/Core/PowerPC/MMU.cpp | 4 +-- Source/Core/Core/PowerPC/PPCAnalyst.cpp | 2 ++ Source/Core/Core/PowerPC/PPCAnalyst.h | 4 +++ Source/Core/Core/PowerPC/PowerPC.h | 1 + 10 files changed, 34 insertions(+), 28 deletions(-) diff --git a/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp b/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp index 1e41dc63c2..ed4c13e2c6 100644 --- a/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp +++ b/Source/Core/Core/PowerPC/CachedInterpreter/CachedInterpreter.cpp @@ -211,7 +211,7 @@ void CachedInterpreter::Jit(u32 address) b->codeSize = (u32)(GetCodePtr() - b->checkedEntry); b->originalSize = code_block.m_num_instructions; - m_block_cache.FinalizeBlock(*b, jo.enableBlocklink, b->checkedEntry); + m_block_cache.FinalizeBlock(*b, jo.enableBlocklink, code_block.m_physical_addresses); } void CachedInterpreter::ClearCache() diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index f7b75223e2..568bfeca55 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -590,7 +590,8 @@ void Jit64::Jit(u32 em_address) } JitBlock* b = blocks.AllocateBlock(em_address); - blocks.FinalizeBlock(*b, jo.enableBlocklink, DoJit(em_address, &code_buffer, b, nextPC)); + DoJit(em_address, &code_buffer, b, nextPC); + blocks.FinalizeBlock(*b, jo.enableBlocklink, code_block.m_physical_addresses); } const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBlock* b, u32 nextPC) diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp index dac66a2e4b..9674a2e6fe 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp @@ -508,7 +508,8 @@ void JitIL::Jit(u32 em_address) } JitBlock* b = blocks.AllocateBlock(em_address); - blocks.FinalizeBlock(*b, jo.enableBlocklink, DoJit(em_address, &code_buffer, b, nextPC)); + DoJit(em_address, &code_buffer, b, nextPC); + blocks.FinalizeBlock(*b, jo.enableBlocklink, code_block.m_physical_addresses); } const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBlock* b, u32 nextPC) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 1dfdabe0c7..7ccdb00614 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -399,7 +399,7 @@ void JitArm64::Jit(u32) JitBlock* b = blocks.AllocateBlock(em_address); const u8* BlockPtr = DoJit(em_address, &code_buffer, b, nextPC); - blocks.FinalizeBlock(*b, jo.enableBlocklink, BlockPtr); + blocks.FinalizeBlock(*b, jo.enableBlocklink, code_block.m_physical_addresses); } const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBlock* b, u32 nextPC) diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index a3360a6dca..802757d5ca 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -38,11 +38,7 @@ static void ClearCacheThreadSafe(u64 userdata, s64 cyclesdata) bool JitBlock::Overlap(u32 addr, u32 length) { - if (addr >= physicalAddress + originalSize) - return false; - if (physicalAddress >= addr + length) - return false; - return true; + return physical_addresses.lower_bound(addr) != physical_addresses.lower_bound(addr + length); } JitBaseBlockCache::JitBaseBlockCache(JitBase& jit) : m_jit{jit} @@ -120,21 +116,21 @@ JitBlock* JitBaseBlockCache::AllocateBlock(u32 em_address) return &b; } -void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link, const u8* code_ptr) +void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link, + const std::set& physical_addresses) { size_t index = FastLookupIndexForAddress(block.effectiveAddress); fast_block_map[index] = █ block.fast_block_map_index = index; - u32 block_start = block.physicalAddress; - u32 block_end = block_start + (block.originalSize - 1) * 4; + block.physical_addresses = physical_addresses; - for (u32 addr = block_start / 32; addr <= block_end / 32; ++addr) - valid_block.Set(addr); - - u32 mask = ~(BLOCK_RANGE_MAP_ELEMENTS - 1); - for (u32 addr = block_start & mask; addr <= (block_end & mask); addr += BLOCK_RANGE_MAP_ELEMENTS) - block_range_map[addr].insert(&block); + u32 range_mask = ~(BLOCK_RANGE_MAP_ELEMENTS - 1); + for (u32 addr : physical_addresses) + { + valid_block.Set(addr / 32); + block_range_map[addr & range_mask].insert(&block); + } if (block_link) { @@ -207,8 +203,8 @@ void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool for if (destroy_block) { // Iterate over all macro blocks which overlap the given range. - u32 mask = ~(BLOCK_RANGE_MAP_ELEMENTS - 1); - auto start = block_range_map.lower_bound(pAddr & mask); + u32 range_mask = ~(BLOCK_RANGE_MAP_ELEMENTS - 1); + auto start = block_range_map.lower_bound(pAddr & range_mask); auto end = block_range_map.lower_bound(pAddr + length); while (start != end) { @@ -221,11 +217,9 @@ void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool for { // If the block overlaps, also remove all other occupied slots in the other macro blocks. // This will leak empty macro blocks, but they may be reused or cleared later on. - u32 block_start = block->physicalAddress; - u32 block_end = block_start + (block->originalSize - 1) * 4; - for (u32 addr = block_start & mask; addr <= (block_end & mask); addr += BLOCK_RANGE_MAP_ELEMENTS) - if (addr != start->first) - block_range_map[addr].erase(block); + for (u32 addr : block->physical_addresses) + if ((addr & range_mask) != start->first) + block_range_map[addr & range_mask].erase(block); // And remove the block. DestroyBlock(*block); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index 1baab02444..f9e29fbd20 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -60,6 +60,9 @@ struct JitBlock }; std::vector linkData; + // This set stores all physical addresses of all occupied instructions. + std::set physical_addresses; + // we don't really need to save start and stop // TODO (mb2): ticStart and ticStop -> "local var" mean "in block" ... low priority ;) u64 ticStart; // for profiling - time. @@ -127,7 +130,7 @@ public: void RunOnBlocks(std::function f); JitBlock* AllocateBlock(u32 em_address); - void FinalizeBlock(JitBlock& block, bool block_link, const u8* code_ptr); + void FinalizeBlock(JitBlock& block, bool block_link, const std::set& physical_addresses); // Look for the block in the slow but accurate way. // This function shall be used if FastLookupIndexForAddress() failed. diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index 3d989412c3..2fbb94fd85 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -384,7 +384,7 @@ TryReadInstResult TryReadInstruction(u32 address) auto tlb_addr = TranslateAddress(address); if (!tlb_addr.Success()) { - return TryReadInstResult{false, false, 0}; + return TryReadInstResult{false, false, 0, 0}; } else { @@ -403,7 +403,7 @@ TryReadInstResult TryReadInstruction(u32 address) { hex = PowerPC::ppcState.iCache.ReadInstruction(address); } - return TryReadInstResult{true, from_bat, hex}; + return TryReadInstResult{true, from_bat, hex, address}; } u32 HostRead_Instruction(const u32 address) diff --git a/Source/Core/Core/PowerPC/PPCAnalyst.cpp b/Source/Core/Core/PowerPC/PPCAnalyst.cpp index 7b44d0f9a7..6b1942559c 100644 --- a/Source/Core/Core/PowerPC/PPCAnalyst.cpp +++ b/Source/Core/Core/PowerPC/PPCAnalyst.cpp @@ -646,6 +646,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, u32 block->m_memory_exception = false; block->m_num_instructions = 0; block->m_gqr_used = BitSet8(0); + block->m_physical_addresses.clear(); CodeOp* code = buffer->codebuffer; @@ -676,6 +677,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, u32 code[i].branchToIndex = -1; code[i].skip = false; block->m_stats->numCycles += opinfo->numCycles; + block->m_physical_addresses.insert(result.physical_address); SetInstructionStats(block, &code[i], opinfo, i); diff --git a/Source/Core/Core/PowerPC/PPCAnalyst.h b/Source/Core/Core/PowerPC/PPCAnalyst.h index 42625757b4..02ebc42c18 100644 --- a/Source/Core/Core/PowerPC/PPCAnalyst.h +++ b/Source/Core/Core/PowerPC/PPCAnalyst.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -157,6 +158,9 @@ struct CodeBlock // Which GPRs this block reads from before defining, if any. BitSet32 m_gpr_inputs; + + // Which memory locations are occupied by this block. + std::set m_physical_addresses; }; class PPCAnalyzer diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 2d8476e0c2..7bac1d97a2 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -232,6 +232,7 @@ struct TryReadInstResult bool valid; bool from_bat; u32 hex; + u32 physical_address; }; TryReadInstResult TryReadInstruction(const u32 address); From 7f6b8e35558c9d671d3be73f71d596d380b0ded2 Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 23 Jan 2017 20:32:27 +0100 Subject: [PATCH 5/5] JitCache: Extract ErasePhysicalRange as function. --- .../Core/Core/PowerPC/JitCommon/JitCache.cpp | 86 ++++++++++--------- Source/Core/Core/PowerPC/JitCommon/JitCache.h | 5 +- 2 files changed, 49 insertions(+), 42 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index 802757d5ca..f756328804 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -36,9 +36,10 @@ static void ClearCacheThreadSafe(u64 userdata, s64 cyclesdata) JitInterface::ClearCache(); } -bool JitBlock::Overlap(u32 addr, u32 length) +bool JitBlock::OverlapsPhysicalRange(u32 address, u32 length) const { - return physical_addresses.lower_bound(addr) != physical_addresses.lower_bound(addr + length); + return physical_addresses.lower_bound(address) != + physical_addresses.lower_bound(address + length); } JitBaseBlockCache::JitBaseBlockCache(JitBase& jit) : m_jit{jit} @@ -182,7 +183,7 @@ const u8* JitBaseBlockCache::Dispatch() return block->normalEntry; } -void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool forced) +void JitBaseBlockCache::InvalidateICache(u32 address, u32 length, bool forced) { auto translated = PowerPC::JitCache_TranslateAddress(address); if (!translated.valid) @@ -199,45 +200,10 @@ void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool for valid_block.Clear(pAddr / 32); } - // destroy JIT blocks if (destroy_block) { - // Iterate over all macro blocks which overlap the given range. - u32 range_mask = ~(BLOCK_RANGE_MAP_ELEMENTS - 1); - auto start = block_range_map.lower_bound(pAddr & range_mask); - auto end = block_range_map.lower_bound(pAddr + length); - while (start != end) - { - // Iterate over all blocks in the macro block. - auto iter = start->second.begin(); - while (iter != start->second.end()) - { - JitBlock* block = *iter; - if (block->Overlap(pAddr, length)) - { - // If the block overlaps, also remove all other occupied slots in the other macro blocks. - // This will leak empty macro blocks, but they may be reused or cleared later on. - for (u32 addr : block->physical_addresses) - if ((addr & range_mask) != start->first) - block_range_map[addr & range_mask].erase(block); - - // And remove the block. - DestroyBlock(*block); - block_map.erase(block->physicalAddress); - iter = start->second.erase(iter); - } - else - { - iter++; - } - } - - // If the macro block is empty, drop it. - if (start->second.empty()) - start = block_range_map.erase(start); - else - start++; - } + // destroy JIT blocks + ErasePhysicalRange(pAddr, length); // If the code was actually modified, we need to clear the relevant entries from the // FIFO write address cache, so we don't end up with FIFO checks in places they shouldn't @@ -254,6 +220,46 @@ void JitBaseBlockCache::InvalidateICache(u32 address, const u32 length, bool for } } +void JitBaseBlockCache::ErasePhysicalRange(u32 address, u32 length) +{ + // Iterate over all macro blocks which overlap the given range. + u32 range_mask = ~(BLOCK_RANGE_MAP_ELEMENTS - 1); + auto start = block_range_map.lower_bound(address & range_mask); + auto end = block_range_map.lower_bound(address + length); + while (start != end) + { + // Iterate over all blocks in the macro block. + auto iter = start->second.begin(); + while (iter != start->second.end()) + { + JitBlock* block = *iter; + if (block->OverlapsPhysicalRange(address, length)) + { + // If the block overlaps, also remove all other occupied slots in the other macro blocks. + // This will leak empty macro blocks, but they may be reused or cleared later on. + for (u32 addr : block->physical_addresses) + if ((addr & range_mask) != start->first) + block_range_map[addr & range_mask].erase(block); + + // And remove the block. + DestroyBlock(*block); + block_map.erase(block->physicalAddress); + iter = start->second.erase(iter); + } + else + { + iter++; + } + } + + // If the macro block is empty, drop it. + if (start->second.empty()) + start = block_range_map.erase(start); + else + start++; + } +} + u32* JitBaseBlockCache::GetBlockBitSet() const { return valid_block.m_valid_block.get(); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index f9e29fbd20..85ae3bfbb8 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -25,7 +25,7 @@ class JitBase; // address. struct JitBlock { - bool Overlap(u32 addr, u32 length); + bool OverlapsPhysicalRange(u32 address, u32 length) const; // A special entry point for block linking; usually used to check the // downcount. @@ -143,7 +143,8 @@ public: // assembly version.) const u8* Dispatch(); - void InvalidateICache(u32 address, const u32 length, bool forced); + void InvalidateICache(u32 address, u32 length, bool forced); + void ErasePhysicalRange(u32 address, u32 length); u32* GetBlockBitSet() const;