From 9c90b31e6aadf5972f858e89e523a895e28ae5f6 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 14 Jan 2023 11:46:37 +0100 Subject: [PATCH 1/2] Memmap: Remove some remnants from the 32-bit JITs --- Source/Core/Core/HW/Memmap.cpp | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/Source/Core/Core/HW/Memmap.cpp b/Source/Core/Core/HW/Memmap.cpp index 9edd303db3..df80f2e62e 100644 --- a/Source/Core/Core/HW/Memmap.cpp +++ b/Source/Core/Core/HW/Memmap.cpp @@ -104,13 +104,8 @@ void MemoryManager::Init() const bool wii = SConfig::GetInstance().bWii; const bool mmu = Core::System::GetInstance().IsMMUMode(); - bool fake_vmem = false; -#ifndef _ARCH_32 // If MMU is turned off in GameCube mode, turn on fake VMEM hack. - // The fake VMEM hack's address space is above the memory space that we - // allocate on 32bit targets, so disable it there. - fake_vmem = !wii && !mmu; -#endif + const bool fake_vmem = !wii && !mmu; u32 mem_size = 0; for (PhysicalMemoryRegion& region : m_physical_regions) @@ -164,11 +159,7 @@ void MemoryManager::Init() bool MemoryManager::InitFastmemArena() { -#if _ARCH_32 - const size_t memory_size = 0x31000000; -#else - const size_t memory_size = 0x400000000; -#endif + constexpr size_t memory_size = 0x400000000; m_physical_base = m_arena.ReserveMemoryRegion(memory_size); if (!m_physical_base) @@ -194,9 +185,7 @@ bool MemoryManager::InitFastmemArena() } } -#ifndef _ARCH_32 m_logical_base = m_physical_base + 0x200000000; -#endif m_is_fastmem_arena_initialized = true; return true; From 4fa9fa997e2a99999a868ad3f504beece5857142 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 14 Jan 2023 12:26:58 +0100 Subject: [PATCH 2/2] Core: Allocate 2 GiB of guard pages below fastmem area See the comment added by this commit. We were previously guarding against overshooting in address calculations, but not against undershooting. Perhaps someone assumed that the displacement of an x86 loadstore was treated as unsigned? Note: While the comment says we can undershoot by up to 2 GiB, in practice Jit64 as it currently behaves won't actually undershoot by more than 0x8000 if my analysis is correct. But address space is cheap, so let's guard the full 2 GiB. --- Source/Core/Core/HW/Memmap.cpp | 44 ++++++++++++++++++++++++++++++---- Source/Core/Core/HW/Memmap.h | 6 ++--- 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/Source/Core/Core/HW/Memmap.cpp b/Source/Core/Core/HW/Memmap.cpp index df80f2e62e..a9b616751b 100644 --- a/Source/Core/Core/HW/Memmap.cpp +++ b/Source/Core/Core/HW/Memmap.cpp @@ -159,15 +159,51 @@ void MemoryManager::Init() bool MemoryManager::InitFastmemArena() { - constexpr size_t memory_size = 0x400000000; - m_physical_base = m_arena.ReserveMemoryRegion(memory_size); + // Here we set up memory mappings for fastmem. The basic idea of fastmem is that we reserve 4 GiB + // of virtual memory and lay out the addresses within that 4 GiB range just like the memory map of + // the emulated system. This lets the JIT emulate PPC load/store instructions by translating a PPC + // address to a host address as follows and then using a regular load/store instruction: + // + // RMEM = ppcState.msr.DR ? m_logical_base : m_physical_base + // host_address = RMEM + u32(ppc_address_base + ppc_address_offset) + // + // If the resulting host address is backed by real memory, the memory access will simply work. + // If not, a segfault handler will backpatch the JIT code to instead call functions in MMU.cpp. + // This way, most memory accesses will be super fast. We do pay a performance penalty for memory + // accesses that need special handling, but they're rare enough that it's very beneficial overall. + // + // Note: Jit64 (but not JitArm64) sometimes takes a shortcut when computing addresses and skips + // the cast to u32 that you see in the pseudocode above. When this happens, ppc_address_base + // is a 32-bit value stored in a 64-bit register (which effectively makes it behave like an + // unsigned 32-bit value), and ppc_address_offset is a signed 32-bit integer encoded directly + // into the load/store instruction. This can cause us to undershoot or overshoot the intended + // 4 GiB range by at most 2 GiB in either direction. So, make sure we have 2 GiB of guard pages + // on each side of each 4 GiB range. + // + // We need two 4 GiB ranges, one for PPC addresses with address translation disabled + // (m_physical_base) and one for PPC addresses with address translation enabled (m_logical_base), + // so our memory map ends up looking like this: + // + // 2 GiB guard + // 4 GiB view for disabled address translation + // 2 GiB guard + // 4 GiB view for enabled address translation + // 2 GiB guard - if (!m_physical_base) + constexpr size_t ppc_view_size = 0x1'0000'0000; + constexpr size_t guard_size = 0x8000'0000; + constexpr size_t memory_size = ppc_view_size * 2 + guard_size * 3; + + u8* fastmem_arena = m_arena.ReserveMemoryRegion(memory_size); + if (!fastmem_arena) { PanicAlertFmt("Memory::InitFastmemArena(): Failed finding a memory base."); return false; } + m_physical_base = fastmem_arena + guard_size; + m_logical_base = fastmem_arena + ppc_view_size + guard_size * 2; + for (const PhysicalMemoryRegion& region : m_physical_regions) { if (!region.active) @@ -185,8 +221,6 @@ bool MemoryManager::InitFastmemArena() } } - m_logical_base = m_physical_base + 0x200000000; - m_is_fastmem_arena_initialized = true; return true; } diff --git a/Source/Core/Core/HW/Memmap.h b/Source/Core/Core/HW/Memmap.h index 41147b2049..70b8bb8d82 100644 --- a/Source/Core/Core/HW/Memmap.h +++ b/Source/Core/Core/HW/Memmap.h @@ -218,8 +218,8 @@ private: // with address translation turned on. This mapping is computed based // on the BAT registers. // - // Each of these 4GB regions is followed by 4GB of empty space so overflows - // in address computation in the JIT don't access the wrong memory. + // Each of these 4GB regions is surrounded by 2GB of empty space so overflows + // in address computation in the JIT don't access unrelated memory. // // The neighboring mirrors of RAM ([0x02000000, 0x08000000), etc.) exist because // the bus masks off the bits in question for RAM accesses; using them is a @@ -227,8 +227,6 @@ private: // few buggy games (notably Rogue Squadron 2) use them by accident. They // aren't backed by memory mappings because they are used very rarely. // - // Dolphin doesn't emulate the difference between cached and uncached access. - // // TODO: The actual size of RAM is 24MB; the other 8MB shouldn't be backed by actual memory. // TODO: Do we want to handle the mirrors of the GC RAM? std::array m_physical_regions{};