diff --git a/Source/Core/Core/HW/Memmap.cpp b/Source/Core/Core/HW/Memmap.cpp index e2dca3581c..3adebdc700 100644 --- a/Source/Core/Core/HW/Memmap.cpp +++ b/Source/Core/Core/HW/Memmap.cpp @@ -42,6 +42,7 @@ namespace Memory // Store the MemArena here u8* physical_base = nullptr; u8* logical_base = nullptr; +static bool is_fastmem_arena_initialized = false; // The MemArena class static Common::MemArena g_arena; @@ -131,7 +132,7 @@ struct LogicalMemoryView // other devices, like the GPU, use other rules, approximated by // Memory::GetPointer.) This memory is laid out as follows: // [0x00000000, 0x02000000) - 32MB RAM -// [0x02000000, 0x08000000) - Mirrors of 32MB RAM +// [0x02000000, 0x08000000) - Mirrors of 32MB RAM (not handled here) // [0x08000000, 0x0C000000) - EFB "mapping" (not handled here) // [0x0C000000, 0x0E000000) - MMIO etc. (not handled here) // [0x10000000, 0x14000000) - 64MB RAM (Wii-only; slightly slower) @@ -155,6 +156,7 @@ struct LogicalMemoryView // // TODO: The actual size of RAM is REALRAM_SIZE (24MB); the other 8MB shouldn't // be backed by actual memory. +// TODO: Do we want to handle the mirrors of the GC RAM? static PhysicalMemoryRegion physical_regions[] = { {&m_pRAM, 0x00000000, RAM_SIZE, PhysicalMemoryRegion::ALWAYS}, {&m_pL1Cache, 0xE0000000, L1_CACHE_SIZE, PhysicalMemoryRegion::ALWAYS}, @@ -164,7 +166,7 @@ static PhysicalMemoryRegion physical_regions[] = { static std::vector logical_mapped_entries; -void Init() +static u32 GetFlags() { bool wii = SConfig::GetInstance().bWii; bool bMMU = SConfig::GetInstance().bMMU; @@ -181,6 +183,14 @@ void Init() flags |= PhysicalMemoryRegion::WII_ONLY; if (bFakeVMEM) flags |= PhysicalMemoryRegion::FAKE_VMEM; + + return flags; +} + +void Init() +{ + bool wii = SConfig::GetInstance().bWii; + u32 flags = GetFlags(); u32 mem_size = 0; for (PhysicalMemoryRegion& region : physical_regions) { @@ -190,15 +200,14 @@ void Init() mem_size += region.size; } g_arena.GrabSHMSegment(mem_size); - physical_base = Common::MemArena::FindMemoryBase(); + // Create an anonymous view of the physical memory for (PhysicalMemoryRegion& region : physical_regions) { if ((flags & region.flags) != region.flags) continue; - u8* base = physical_base + region.physical_address; - *region.out_pointer = (u8*)g_arena.CreateView(region.shm_position, region.size, base); + *region.out_pointer = (u8*)g_arena.CreateView(region.shm_position, region.size); if (!*region.out_pointer) { @@ -207,10 +216,6 @@ void Init() } } -#ifndef _ARCH_32 - logical_base = physical_base + 0x200000000; -#endif - if (wii) mmio_mapping = InitMMIOWii(); else @@ -222,8 +227,41 @@ void Init() m_IsInitialized = true; } +bool InitFastmemArena() +{ + u32 flags = GetFlags(); + physical_base = Common::MemArena::FindMemoryBase(); + + if (!physical_base) + return false; + + for (PhysicalMemoryRegion& region : physical_regions) + { + if ((flags & region.flags) != region.flags) + continue; + + u8* base = physical_base + region.physical_address; + u8* view = (u8*)g_arena.CreateView(region.shm_position, region.size, base); + + if (base != view) + { + return false; + } + } + +#ifndef _ARCH_32 + logical_base = physical_base + 0x200000000; +#endif + + is_fastmem_arena_initialized = true; + return true; +} + void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table) { + if (!is_fastmem_arena_initialized) + return; + for (auto& entry : logical_mapped_entries) { g_arena.ReleaseView(entry.mapped_pointer, entry.mapped_size); @@ -279,12 +317,10 @@ void DoState(PointerWrap& p) void Shutdown() { + ShutdownFastmemArena(); + m_IsInitialized = false; - u32 flags = 0; - if (SConfig::GetInstance().bWii) - flags |= PhysicalMemoryRegion::WII_ONLY; - if (m_pFakeVMEM) - flags |= PhysicalMemoryRegion::FAKE_VMEM; + u32 flags = GetFlags(); for (PhysicalMemoryRegion& region : physical_regions) { if ((flags & region.flags) != region.flags) @@ -292,16 +328,36 @@ void Shutdown() g_arena.ReleaseView(*region.out_pointer, region.size); *region.out_pointer = nullptr; } + g_arena.ReleaseSHMSegment(); + mmio_mapping.reset(); + INFO_LOG(MEMMAP, "Memory system shut down."); +} + +void ShutdownFastmemArena() +{ + if (!is_fastmem_arena_initialized) + return; + + u32 flags = GetFlags(); + for (PhysicalMemoryRegion& region : physical_regions) + { + if ((flags & region.flags) != region.flags) + continue; + + u8* base = physical_base + region.physical_address; + g_arena.ReleaseView(base, region.size); + } + for (auto& entry : logical_mapped_entries) { g_arena.ReleaseView(entry.mapped_pointer, entry.mapped_size); } logical_mapped_entries.clear(); - g_arena.ReleaseSHMSegment(); + physical_base = nullptr; logical_base = nullptr; - mmio_mapping.reset(); - INFO_LOG(MEMMAP, "Memory system shut down."); + + is_fastmem_arena_initialized = false; } void Clear() diff --git a/Source/Core/Core/HW/Memmap.h b/Source/Core/Core/HW/Memmap.h index 4cd5cfd5b3..ae98bbc7ef 100644 --- a/Source/Core/Core/HW/Memmap.h +++ b/Source/Core/Core/HW/Memmap.h @@ -62,6 +62,8 @@ extern std::unique_ptr mmio_mapping; bool IsInitialized(); void Init(); void Shutdown(); +bool InitFastmemArena(); +void ShutdownFastmemArena(); void DoState(PointerWrap& p); void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index c1330446f4..6a4b2adc3f 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -333,6 +333,7 @@ void Jit64::Init() InitializeInstructionTables(); EnableBlockLink(); + jo.fastmem_arena = SConfig::GetInstance().bFastmem && Memory::InitFastmemArena(); jo.optimizeGatherPipe = true; jo.accurateSinglePrecision = true; UpdateMemoryOptions(); @@ -393,6 +394,8 @@ void Jit64::Shutdown() FreeStack(); FreeCodeSpace(); + Memory::ShutdownFastmemArena(); + blocks.Shutdown(); m_far_code.Shutdown(); m_const_pool.Shutdown(); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index a1cfe3976b..4fae084339 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -304,7 +304,9 @@ void Jit64::dcbz(UGeckoInstruction inst) AND(32, R(RSCRATCH), Imm32(~31)); } - if (MSR.DR) + bool emit_fast_path = MSR.DR && m_jit.jo.fastmem_arena; + + if (emit_fast_path) { // Perform lookup to see if we can use fast path. MOV(64, R(RSCRATCH2), ImmPtr(&PowerPC::dbat_table[0])); @@ -329,7 +331,7 @@ void Jit64::dcbz(UGeckoInstruction inst) ABI_CallFunctionR(PowerPC::ClearCacheLine, RSCRATCH); ABI_PopRegistersAndAdjustStack(registersInUse, 0); - if (MSR.DR) + if (emit_fast_path) { FixupBranch end = J(true); SwitchToNearCode(); diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp index 23d6dbceae..73986763cb 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp @@ -371,7 +371,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, FixupBranch exit; const bool dr_set = (flags & SAFE_LOADSTORE_DR_ON) || MSR.DR; - const bool fast_check_address = !slowmem && dr_set; + const bool fast_check_address = !slowmem && dr_set && m_jit.jo.fastmem_arena; if (fast_check_address) { FixupBranch slow = CheckIfSafeAddress(R(reg_value), reg_addr, registersInUse); @@ -435,7 +435,7 @@ void EmuCodeBlock::SafeLoadToRegImmediate(X64Reg reg_value, u32 address, int acc BitSet32 registersInUse, bool signExtend) { // If the address is known to be RAM, just load it directly. - if (PowerPC::IsOptimizableRAMAddress(address)) + if (m_jit.jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(address)) { UnsafeLoadToReg(reg_value, Imm32(address), accessSize, 0, signExtend); return; @@ -539,7 +539,7 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces FixupBranch exit; const bool dr_set = (flags & SAFE_LOADSTORE_DR_ON) || MSR.DR; - const bool fast_check_address = !slowmem && dr_set; + const bool fast_check_address = !slowmem && dr_set && m_jit.jo.fastmem_arena; if (fast_check_address) { FixupBranch slow = CheckIfSafeAddress(reg_value, reg_addr, registersInUse); @@ -641,7 +641,7 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address, m_jit.js.fifoBytesSinceCheck += accessSize >> 3; return false; } - else if (PowerPC::IsOptimizableRAMAddress(address)) + else if (m_jit.jo.fastmem_arena && PowerPC::IsOptimizableRAMAddress(address)) { WriteToConstRamAddress(accessSize, arg, address); return false; diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp index 70224162f0..569c28edb5 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp @@ -489,6 +489,7 @@ void QuantizedMemoryRoutines::GenQuantizedLoad(bool single, EQuantizeType type, int size = sizes[type] * (single ? 1 : 2); bool isInline = quantize != -1; + bool safe_access = m_jit.jo.memcheck || !m_jit.jo.fastmem; // illegal if (type == QUANTIZE_INVALID1 || type == QUANTIZE_INVALID2 || type == QUANTIZE_INVALID3) @@ -506,7 +507,7 @@ void QuantizedMemoryRoutines::GenQuantizedLoad(bool single, EQuantizeType type, bool extend = single && (type == QUANTIZE_S8 || type == QUANTIZE_S16); - if (m_jit.jo.memcheck) + if (safe_access) { BitSet32 regsToSave = QUANTIZED_REGS_TO_SAVE_LOAD; int flags = isInline ? 0 : @@ -632,8 +633,9 @@ void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline) { int size = single ? 32 : 64; bool extend = false; + bool safe_access = m_jit.jo.memcheck || !m_jit.jo.fastmem; - if (m_jit.jo.memcheck) + if (safe_access) { BitSet32 regsToSave = QUANTIZED_REGS_TO_SAVE; int flags = isInline ? 0 : @@ -644,7 +646,7 @@ void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline) if (single) { - if (m_jit.jo.memcheck) + if (safe_access) { MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); } @@ -669,7 +671,7 @@ void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline) // for a good reason, or merely because no game does this. // If we find something that actually does do this, maybe this should be changed. How // much of a performance hit would it be? - if (m_jit.jo.memcheck) + if (safe_access) { ROL(64, R(RSCRATCH_EXTRA), Imm8(32)); MOVQ_xmm(XMM0, R(RSCRATCH_EXTRA)); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index a60035d47a..a0b6fe9aae 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -49,6 +49,8 @@ void JitArm64::Init() size_t child_code_size = SConfig::GetInstance().bMMU ? FARCODE_SIZE_MMU : FARCODE_SIZE; AllocCodeSpace(CODE_SIZE + child_code_size); AddChildCodeSpace(&farcode, child_code_size); + + jo.fastmem_arena = SConfig::GetInstance().bFastmem && Memory::InitFastmemArena(); jo.enableBlocklink = true; jo.optimizeGatherPipe = true; UpdateMemoryOptions(); @@ -133,6 +135,7 @@ void JitArm64::ClearCache() void JitArm64::Shutdown() { + Memory::ShutdownFastmemArena(); FreeCodeSpace(); blocks.Shutdown(); FreeStack(); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp index b5ef8c2dc0..eeabe7737d 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp @@ -45,6 +45,6 @@ bool JitBase::CanMergeNextInstructions(int count) const void JitBase::UpdateMemoryOptions() { bool any_watchpoints = PowerPC::memchecks.HasAny(); - jo.fastmem = SConfig::GetInstance().bFastmem && (MSR.DR || !any_watchpoints); + jo.fastmem = SConfig::GetInstance().bFastmem && jo.fastmem_arena && (MSR.DR || !any_watchpoints); jo.memcheck = SConfig::GetInstance().bMMU || any_watchpoints; } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index 6597384813..0d45293883 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -48,6 +48,7 @@ protected: bool optimizeGatherPipe; bool accurateSinglePrecision; bool fastmem; + bool fastmem_arena; bool memcheck; bool profile_blocks; }; diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index 3d99386533..952556ddea 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -1117,13 +1117,12 @@ static TranslateAddressResult TranslatePageAddress(const u32 address, const XChe for (int i = 0; i < 8; i++, pteg_addr += 8) { - u32 pteg; - std::memcpy(&pteg, &Memory::physical_base[pteg_addr], sizeof(u32)); + u32 pteg = Common::swap32(Memory::Read_U32(pteg_addr)); if (pte1 == pteg) { UPTE2 PTE2; - PTE2.Hex = Common::swap32(&Memory::physical_base[pteg_addr + 4]); + PTE2.Hex = Memory::Read_U32(pteg_addr + 4); // set the access bits switch (flag) @@ -1145,8 +1144,7 @@ static TranslateAddressResult TranslatePageAddress(const u32 address, const XChe if (!IsNoExceptionFlag(flag)) { - const u32 swapped_pte2 = Common::swap32(PTE2.Hex); - std::memcpy(&Memory::physical_base[pteg_addr + 4], &swapped_pte2, sizeof(u32)); + Memory::Write_U32(PTE2.Hex, pteg_addr + 4); } // We already updated the TLB entry if this was caused by a C bit.