diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp index af2ac6eb30..62be247227 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp @@ -323,14 +323,14 @@ void Interpreter::lwzu(UGeckoInstruction inst) void Interpreter::stb(UGeckoInstruction inst) { - PowerPC::Write_U8((u8)rGPR[inst.RS], Helper_Get_EA(PowerPC::ppcState, inst)); + PowerPC::Write_U8(rGPR[inst.RS], Helper_Get_EA(PowerPC::ppcState, inst)); } void Interpreter::stbu(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_U(PowerPC::ppcState, inst); - PowerPC::Write_U8((u8)rGPR[inst.RS], address); + PowerPC::Write_U8(rGPR[inst.RS], address); if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) { rGPR[inst.RA] = address; @@ -399,14 +399,14 @@ void Interpreter::stfsu(UGeckoInstruction inst) void Interpreter::sth(UGeckoInstruction inst) { - PowerPC::Write_U16((u16)rGPR[inst.RS], Helper_Get_EA(PowerPC::ppcState, inst)); + PowerPC::Write_U16(rGPR[inst.RS], Helper_Get_EA(PowerPC::ppcState, inst)); } void Interpreter::sthu(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_U(PowerPC::ppcState, inst); - PowerPC::Write_U16((u16)rGPR[inst.RS], address); + PowerPC::Write_U16(rGPR[inst.RS], address); if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) { rGPR[inst.RA] = address; @@ -731,7 +731,7 @@ void Interpreter::stbux(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_UX(PowerPC::ppcState, inst); - PowerPC::Write_U8((u8)rGPR[inst.RS], address); + PowerPC::Write_U8(rGPR[inst.RS], address); if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) { rGPR[inst.RA] = address; @@ -740,7 +740,7 @@ void Interpreter::stbux(UGeckoInstruction inst) void Interpreter::stbx(UGeckoInstruction inst) { - PowerPC::Write_U8((u8)rGPR[inst.RS], Helper_Get_EA_X(PowerPC::ppcState, inst)); + PowerPC::Write_U8(rGPR[inst.RS], Helper_Get_EA_X(PowerPC::ppcState, inst)); } void Interpreter::stfdux(UGeckoInstruction inst) @@ -819,14 +819,14 @@ void Interpreter::stfsx(UGeckoInstruction inst) void Interpreter::sthbrx(UGeckoInstruction inst) { - PowerPC::Write_U16(Common::swap16((u16)rGPR[inst.RS]), Helper_Get_EA_X(PowerPC::ppcState, inst)); + PowerPC::Write_U16_Swap(rGPR[inst.RS], Helper_Get_EA_X(PowerPC::ppcState, inst)); } void Interpreter::sthux(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_UX(PowerPC::ppcState, inst); - PowerPC::Write_U16((u16)rGPR[inst.RS], address); + PowerPC::Write_U16(rGPR[inst.RS], address); if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) { rGPR[inst.RA] = address; @@ -835,7 +835,7 @@ void Interpreter::sthux(UGeckoInstruction inst) void Interpreter::sthx(UGeckoInstruction inst) { - PowerPC::Write_U16((u16)rGPR[inst.RS], Helper_Get_EA_X(PowerPC::ppcState, inst)); + PowerPC::Write_U16(rGPR[inst.RS], Helper_Get_EA_X(PowerPC::ppcState, inst)); } // lswi - bizarro string instruction @@ -968,7 +968,7 @@ void Interpreter::stwbrx(UGeckoInstruction inst) { const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst); - PowerPC::Write_U32(Common::swap32(rGPR[inst.RS]), address); + PowerPC::Write_U32_Swap(rGPR[inst.RS], address); } // The following two instructions are for SMP communications. On a single diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index b45bbfc35a..51fa36b7b0 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -197,18 +197,16 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR } else if (flags & BackPatchInfo::FLAG_STORE) { - ARM64Reg temp = ARM64Reg::W0; - temp = ByteswapBeforeStore(this, temp, RS, flags, false); - if (temp != ARM64Reg::W0) - MOV(ARM64Reg::W0, temp); + const bool reverse = (flags & BackPatchInfo::FLAG_REVERSE) != 0; if (flags & BackPatchInfo::FLAG_SIZE_32) - MOVP2R(ARM64Reg::X8, &PowerPC::Write_U32); + MOVP2R(ARM64Reg::X8, reverse ? &PowerPC::Write_U32_Swap : &PowerPC::Write_U32); else if (flags & BackPatchInfo::FLAG_SIZE_16) - MOVP2R(ARM64Reg::X8, &PowerPC::Write_U16); + MOVP2R(ARM64Reg::X8, reverse ? &PowerPC::Write_U16_Swap : &PowerPC::Write_U16); else MOVP2R(ARM64Reg::X8, &PowerPC::Write_U8); + MOV(ARM64Reg::W0, RS); BLR(ARM64Reg::X8); } else if (flags & BackPatchInfo::FLAG_ZERO_256) diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index 3b49381d76..3c70a5319c 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -8,14 +8,17 @@ #include #include +#include "Common/Assert.h" #include "Common/BitUtils.h" #include "Common/CommonTypes.h" +#include "Common/Logging/Log.h" #include "Core/ConfigManager.h" #include "Core/HW/CPU.h" #include "Core/HW/GPFifo.h" #include "Core/HW/MMIO.h" #include "Core/HW/Memmap.h" +#include "Core/HW/ProcessorInterface.h" #include "Core/PowerPC/JitInterface.h" #include "Core/PowerPC/PowerPC.h" @@ -106,6 +109,7 @@ struct TranslateAddressResult PAGE_FAULT } result; u32 address; + bool wi; // Set to true if the view of memory is either write-through or cache-inhibited bool Success() const { return result <= PAGE_TABLE_TRANSLATED; } }; template @@ -255,9 +259,28 @@ static T ReadFromHardware(u32 em_address) return 0; } -template -static void WriteToHardware(u32 em_address, const T data) +template +static void WriteToHardware(u32 em_address, const u32 data, const u32 size) { + DEBUG_ASSERT(size <= 4); + + const u32 em_address_start_page = em_address & ~(HW_PAGE_SIZE - 1); + const u32 em_address_end_page = (em_address + size - 1) & ~(HW_PAGE_SIZE - 1); + if (em_address_start_page != em_address_end_page) + { + // The write crosses a page boundary. Break it up into two writes. + // TODO: floats on non-word-aligned boundaries should technically cause alignment exceptions. + // Note that "word" means 32-bit, so paired singles or doubles might still be 32-bit aligned! + const u32 first_half_size = em_address_end_page - em_address; + const u32 second_half_size = size - first_half_size; + WriteToHardware( + em_address, Common::RotateRight(data, second_half_size * 8), first_half_size); + WriteToHardware(em_address_end_page, data, second_half_size); + return; + } + + bool wi = false; + if (!never_translate && MSR.DR) { auto translated_addr = TranslateAddress(em_address); @@ -267,72 +290,8 @@ static void WriteToHardware(u32 em_address, const T data) GenerateDSIException(em_address, true); return; } - if ((em_address & (sizeof(T) - 1)) && - (em_address & (HW_PAGE_SIZE - 1)) > HW_PAGE_SIZE - sizeof(T)) - { - // This could be unaligned down to the byte level... hopefully this is rare, so doing it this - // way isn't too terrible. - // TODO: floats on non-word-aligned boundaries should technically cause alignment exceptions. - // Note that "word" means 32-bit, so paired singles or doubles might still be 32-bit aligned! - u32 em_address_next_page = (em_address + sizeof(T) - 1) & ~(HW_PAGE_SIZE - 1); - auto addr_next_page = TranslateAddress(em_address_next_page); - if (!addr_next_page.Success()) - { - if (flag == XCheckTLBFlag::Write) - GenerateDSIException(em_address_next_page, true); - return; - } - T val = bswap(data); - u32 addr_translated = translated_addr.address; - for (size_t i = 0; i < sizeof(T); i++, addr_translated++) - { - if (em_address + i == em_address_next_page) - addr_translated = addr_next_page.address; - WriteToHardware(addr_translated, static_cast(val >> (i * 8))); - } - return; - } em_address = translated_addr.address; - } - - // TODO: Make sure these are safe for unaligned addresses. - - if (Memory::m_pRAM && (em_address & 0xF8000000) == 0x00000000) - { - // Handle RAM; the masking intentionally discards bits (essentially creating - // mirrors of memory). - // TODO: Only the first GetRamSizeReal() is supposed to be backed by actual memory. - const T swapped_data = bswap(data); - std::memcpy(&Memory::m_pRAM[em_address & Memory::GetRamMask()], &swapped_data, sizeof(T)); - return; - } - - if (Memory::m_pEXRAM && (em_address >> 28) == 0x1 && - (em_address & 0x0FFFFFFF) < Memory::GetExRamSizeReal()) - { - const T swapped_data = bswap(data); - std::memcpy(&Memory::m_pEXRAM[em_address & 0x0FFFFFFF], &swapped_data, sizeof(T)); - return; - } - - // Locked L1 technically doesn't have a fixed address, but games all use 0xE0000000. - if (Memory::m_pL1Cache && (em_address >> 28 == 0xE) && - (em_address < (0xE0000000 + Memory::GetL1CacheSize()))) - { - const T swapped_data = bswap(data); - std::memcpy(&Memory::m_pL1Cache[em_address & 0x0FFFFFFF], &swapped_data, sizeof(T)); - return; - } - - // In Fake-VMEM mode, we need to map the memory somewhere into - // physical memory for BAT translation to work; we currently use - // [0x7E000000, 0x80000000). - if (Memory::m_pFakeVMEM && ((em_address & 0xFE000000) == 0x7E000000)) - { - const T swapped_data = bswap(data); - std::memcpy(&Memory::m_pFakeVMEM[em_address & Memory::GetFakeVMemMask()], &swapped_data, - sizeof(T)); - return; + wi = translated_addr.wi; } // Check for a gather pipe write. @@ -340,19 +299,24 @@ static void WriteToHardware(u32 em_address, const T data) // Pac-Man World 3 in particular is affected by this. if (flag == XCheckTLBFlag::Write && (em_address & 0xFFFFF000) == 0x0C008000) { - switch (sizeof(T)) + switch (size) { case 1: - GPFifo::Write8((u8)data); + GPFifo::Write8(static_cast(data)); return; case 2: - GPFifo::Write16((u16)data); + GPFifo::Write16(static_cast(data)); return; case 4: - GPFifo::Write32((u32)data); + GPFifo::Write32(data); return; - case 8: - GPFifo::Write64((u64)data); + default: + // Some kind of misaligned write. TODO: Does this match how the actual hardware handles it? + for (size_t i = size * 8; i > 0;) + { + i -= 8; + GPFifo::Write8(static_cast(data >> i)); + } return; } } @@ -361,16 +325,90 @@ static void WriteToHardware(u32 em_address, const T data) { if (em_address < 0x0c000000) { - EFB_Write((u32)data, em_address); + EFB_Write(data, em_address); return; } - else + + switch (size) { - Memory::mmio_mapping->Write(em_address, data); + case 1: + Memory::mmio_mapping->Write(em_address, static_cast(data)); + return; + case 2: + Memory::mmio_mapping->Write(em_address, static_cast(data)); + return; + case 4: + Memory::mmio_mapping->Write(em_address, data); + return; + default: + // Some kind of misaligned write. TODO: Does this match how the actual hardware handles it? + for (size_t i = size * 8; i > 0; em_address++) + { + i -= 8; + Memory::mmio_mapping->Write(em_address, static_cast(data >> i)); + } return; } } + const u32 swapped_data = Common::swap32(Common::RotateRight(data, size * 8)); + + // Locked L1 technically doesn't have a fixed address, but games all use 0xE0000000. + if (Memory::m_pL1Cache && (em_address >> 28 == 0xE) && + (em_address < (0xE0000000 + Memory::GetL1CacheSize()))) + { + std::memcpy(&Memory::m_pL1Cache[em_address & 0x0FFFFFFF], &swapped_data, size); + return; + } + + if (wi && (size < 4 || (em_address & 0x3))) + { + // When a write to memory is performed in hardware, 64 bits of data are sent to the memory + // controller along with a mask. This mask is encoded using just two bits of data - one for + // the upper 32 bits and one for the lower 32 bits - which leads to some odd data duplication + // behavior for write-through/cache-inhibited writes with a start address or end address that + // isn't 32-bit aligned. See https://bugs.dolphin-emu.org/issues/12565 for details. + + // TODO: This interrupt is supposed to have associated cause and address registers + // TODO: This should trigger the hwtest's interrupt handling, but it does not seem to + // (https://github.com/dolphin-emu/hwtests/pull/42) + ProcessorInterface::SetInterrupt(ProcessorInterface::INT_CAUSE_PI); + + const u32 rotated_data = Common::RotateRight(data, ((em_address & 0x3) + size) * 8); + + for (u32 addr = em_address & ~0x7; addr < em_address + size; addr += 8) + { + WriteToHardware(addr, rotated_data, 4); + WriteToHardware(addr + 4, rotated_data, 4); + } + + return; + } + + if (Memory::m_pRAM && (em_address & 0xF8000000) == 0x00000000) + { + // Handle RAM; the masking intentionally discards bits (essentially creating + // mirrors of memory). + std::memcpy(&Memory::m_pRAM[em_address & Memory::GetRamMask()], &swapped_data, size); + return; + } + + if (Memory::m_pEXRAM && (em_address >> 28) == 0x1 && + (em_address & 0x0FFFFFFF) < Memory::GetExRamSizeReal()) + { + std::memcpy(&Memory::m_pEXRAM[em_address & 0x0FFFFFFF], &swapped_data, size); + return; + } + + // In Fake-VMEM mode, we need to map the memory somewhere into + // physical memory for BAT translation to work; we currently use + // [0x7E000000, 0x80000000). + if (Memory::m_pFakeVMEM && ((em_address & 0xFE000000) == 0x7E000000)) + { + std::memcpy(&Memory::m_pFakeVMEM[em_address & Memory::GetFakeVMemMask()], &swapped_data, size); + return; + } + PanicAlertFmt("Unable to resolve write address {:x} PC {:x}", em_address, PC); } // ===================== @@ -607,42 +645,40 @@ u32 Read_U16_ZX(const u32 address) return Read_U16(address); } -void Write_U8(const u8 var, const u32 address) +void Write_U8(const u32 var, const u32 address) { Memcheck(address, var, true, 1); - WriteToHardware(address, var); + WriteToHardware(address, var, 1); } -void Write_U16(const u16 var, const u32 address) +void Write_U16(const u32 var, const u32 address) { Memcheck(address, var, true, 2); - WriteToHardware(address, var); + WriteToHardware(address, var, 2); } -void Write_U16_Swap(const u16 var, const u32 address) +void Write_U16_Swap(const u32 var, const u32 address) { - Memcheck(address, var, true, 2); - Write_U16(Common::swap16(var), address); + Write_U16((var & 0xFFFF0000) | Common::swap16(static_cast(var)), address); } void Write_U32(const u32 var, const u32 address) { Memcheck(address, var, true, 4); - WriteToHardware(address, var); + WriteToHardware(address, var, 4); } void Write_U32_Swap(const u32 var, const u32 address) { - Memcheck(address, var, true, 4); Write_U32(Common::swap32(var), address); } void Write_U64(const u64 var, const u32 address) { Memcheck(address, (u32)var, true, 8); - WriteToHardware(address, var); + WriteToHardware(address, static_cast(var >> 32), 4); + WriteToHardware(address + sizeof(u32), static_cast(var), 4); } void Write_U64_Swap(const u64 var, const u32 address) { - Memcheck(address, (u32)var, true, 8); Write_U64(Common::swap64(var), address); } @@ -687,24 +723,25 @@ double HostRead_F64(const u32 address) return Common::BitCast(integral); } -void HostWrite_U8(const u8 var, const u32 address) +void HostWrite_U8(const u32 var, const u32 address) { - WriteToHardware(address, var); + WriteToHardware(address, var, 1); } -void HostWrite_U16(const u16 var, const u32 address) +void HostWrite_U16(const u32 var, const u32 address) { - WriteToHardware(address, var); + WriteToHardware(address, var, 2); } void HostWrite_U32(const u32 var, const u32 address) { - WriteToHardware(address, var); + WriteToHardware(address, var, 4); } void HostWrite_U64(const u64 var, const u32 address) { - WriteToHardware(address, var); + WriteToHardware(address, static_cast(var >> 32), 4); + WriteToHardware(address + sizeof(u32), static_cast(var), 4); } void HostWrite_F32(const float var, const u32 address) @@ -721,8 +758,8 @@ void HostWrite_F64(const double var, const u32 address) HostWrite_U64(integral, address); } -template -static TryWriteResult HostTryWriteUX(const T var, const u32 address, RequestedAddressSpace space) +static TryWriteResult HostTryWriteUX(const u32 var, const u32 address, const u32 size, + RequestedAddressSpace space) { if (!HostIsRAMAddress(address, space)) return TryWriteResult(); @@ -730,15 +767,15 @@ static TryWriteResult HostTryWriteUX(const T var, const u32 address, RequestedAd switch (space) { case RequestedAddressSpace::Effective: - WriteToHardware(address, var); + WriteToHardware(address, var, size); return TryWriteResult(!!MSR.DR); case RequestedAddressSpace::Physical: - WriteToHardware(address, var); + WriteToHardware(address, var, size); return TryWriteResult(false); case RequestedAddressSpace::Virtual: if (!MSR.DR) return TryWriteResult(); - WriteToHardware(address, var); + WriteToHardware(address, var, size); return TryWriteResult(true); } @@ -746,24 +783,28 @@ static TryWriteResult HostTryWriteUX(const T var, const u32 address, RequestedAd return TryWriteResult(); } -TryWriteResult HostTryWriteU8(const u8 var, const u32 address, RequestedAddressSpace space) +TryWriteResult HostTryWriteU8(const u32 var, const u32 address, RequestedAddressSpace space) { - return HostTryWriteUX(var, address, space); + return HostTryWriteUX(var, address, 1, space); } -TryWriteResult HostTryWriteU16(const u16 var, const u32 address, RequestedAddressSpace space) +TryWriteResult HostTryWriteU16(const u32 var, const u32 address, RequestedAddressSpace space) { - return HostTryWriteUX(var, address, space); + return HostTryWriteUX(var, address, 2, space); } TryWriteResult HostTryWriteU32(const u32 var, const u32 address, RequestedAddressSpace space) { - return HostTryWriteUX(var, address, space); + return HostTryWriteUX(var, address, 4, space); } TryWriteResult HostTryWriteU64(const u64 var, const u32 address, RequestedAddressSpace space) { - return HostTryWriteUX(var, address, space); + const TryWriteResult result = HostTryWriteUX(static_cast(var >> 32), address, 4, space); + if (!result) + return result; + + return HostTryWriteUX(static_cast(var), address + 4, 4, space); } TryWriteResult HostTryWriteF32(const float var, const u32 address, RequestedAddressSpace space) @@ -1000,8 +1041,8 @@ void ClearCacheLine(u32 address) // TODO: This isn't precisely correct for non-RAM regions, but the difference // is unlikely to matter. - for (u32 i = 0; i < 32; i += 8) - WriteToHardware(address + i, 0); + for (u32 i = 0; i < 32; i += 4) + WriteToHardware(address + i, 0, 4); } u32 IsOptimizableMMIOAccess(u32 address, u32 access_size) @@ -1015,7 +1056,8 @@ u32 IsOptimizableMMIOAccess(u32 address, u32 access_size) // Translate address // If we also optimize for TLB mappings, we'd have to clear the // JitCache on each TLB invalidation. - if (!TranslateBatAddess(dbat_table, &address)) + bool wi = false; + if (!TranslateBatAddess(dbat_table, &address, &wi)) return 0; // Check whether the address is an aligned address of an MMIO register. @@ -1037,7 +1079,8 @@ bool IsOptimizableGatherPipeWrite(u32 address) // Translate address, only check BAT mapping. // If we also optimize for TLB mappings, we'd have to clear the // JitCache on each TLB invalidation. - if (!TranslateBatAddess(dbat_table, &address)) + bool wi = false; + if (!TranslateBatAddess(dbat_table, &address, &wi)) return false; // Check whether the translated address equals the address in WPAR. @@ -1206,18 +1249,20 @@ enum class TLBLookupResult UpdateC }; -static TLBLookupResult LookupTLBPageAddress(const XCheckTLBFlag flag, const u32 vpa, u32* paddr) +static TLBLookupResult LookupTLBPageAddress(const XCheckTLBFlag flag, const u32 vpa, u32* paddr, + bool* wi) { const u32 tag = vpa >> HW_PAGE_INDEX_SHIFT; TLBEntry& tlbe = ppcState.tlb[IsOpcodeFlag(flag)][tag & HW_PAGE_INDEX_MASK]; if (tlbe.tag[0] == tag) { + UPTE2 PTE2; + PTE2.Hex = tlbe.pte[0]; + // Check if C bit requires updating if (flag == XCheckTLBFlag::Write) { - UPTE2 PTE2; - PTE2.Hex = tlbe.pte[0]; if (PTE2.C == 0) { PTE2.C = 1; @@ -1230,16 +1275,18 @@ static TLBLookupResult LookupTLBPageAddress(const XCheckTLBFlag flag, const u32 tlbe.recent = 0; *paddr = tlbe.paddr[0] | (vpa & 0xfff); + *wi = (PTE2.WIMG & 0b1100) != 0; return TLBLookupResult::Found; } if (tlbe.tag[1] == tag) { + UPTE2 PTE2; + PTE2.Hex = tlbe.pte[0]; + // Check if C bit requires updating if (flag == XCheckTLBFlag::Write) { - UPTE2 PTE2; - PTE2.Hex = tlbe.pte[1]; if (PTE2.C == 0) { PTE2.C = 1; @@ -1252,6 +1299,7 @@ static TLBLookupResult LookupTLBPageAddress(const XCheckTLBFlag flag, const u32 tlbe.recent = 1; *paddr = tlbe.paddr[1] | (vpa & 0xfff); + *wi = (PTE2.WIMG & 0b1100) != 0; return TLBLookupResult::Found; } @@ -1286,14 +1334,14 @@ void InvalidateTLBEntry(u32 address) } // Page Address Translation -static TranslateAddressResult TranslatePageAddress(const u32 address, const XCheckTLBFlag flag) +static TranslateAddressResult TranslatePageAddress(const u32 address, const XCheckTLBFlag flag, + bool* wi) { // TLB cache // This catches 99%+ of lookups in practice, so the actual page table entry code below doesn't - // benefit - // much from optimization. + // benefit much from optimization. u32 translatedAddress = 0; - TLBLookupResult res = LookupTLBPageAddress(flag, address, &translatedAddress); + TLBLookupResult res = LookupTLBPageAddress(flag, address, &translatedAddress, wi); if (res == TLBLookupResult::Found) return TranslateAddressResult{TranslateAddressResult::PAGE_TABLE_TRANSLATED, translatedAddress}; @@ -1368,6 +1416,8 @@ static TranslateAddressResult TranslatePageAddress(const u32 address, const XChe if (res != TLBLookupResult::UpdateC) UpdateTLBEntry(flag, PTE2, address); + *wi = (PTE2.WIMG & 0b1100) != 0; + return TranslateAddressResult{TranslateAddressResult::PAGE_TABLE_TRANSLATED, (PTE2.RPN << 12) | offset}; } @@ -1379,7 +1429,7 @@ static TranslateAddressResult TranslatePageAddress(const u32 address, const XChe static void UpdateBATs(BatTable& bat_table, u32 base_spr) { // TODO: Separate BATs for MSR.PR==0 and MSR.PR==1 - // TODO: Handle PP/WIMG settings. + // TODO: Handle PP settings. // TODO: Check how hardware reacts to overlapping BATs (including // BATs which should cause a DSI). // TODO: Check how hardware reacts to invalid BATs (bad mask etc). @@ -1424,19 +1474,38 @@ static void UpdateBATs(BatTable& bat_table, u32 base_spr) u32 physical_address = (batl.BRPN | j) << BAT_INDEX_SHIFT; u32 virtual_address = (batu.BEPI | j) << BAT_INDEX_SHIFT; - // The bottom bit is whether the translation is valid; the second - // bit from the bottom is whether we can use the fastmem arena. + // BAT_MAPPED_BIT is whether the translation is valid + // BAT_PHYSICAL_BIT is whether we can use the fastmem arena + // BAT_WI_BIT is whether either W or I (of WIMG) is set u32 valid_bit = BAT_MAPPED_BIT; - if (Memory::m_pFakeVMEM && (physical_address & 0xFE000000) == 0x7E000000) - valid_bit |= BAT_PHYSICAL_BIT; - else if (physical_address < Memory::GetRamSizeReal()) - valid_bit |= BAT_PHYSICAL_BIT; - else if (Memory::m_pEXRAM && physical_address >> 28 == 0x1 && - (physical_address & 0x0FFFFFFF) < Memory::GetExRamSizeReal()) - valid_bit |= BAT_PHYSICAL_BIT; - else if (physical_address >> 28 == 0xE && - physical_address < 0xE0000000 + Memory::GetL1CacheSize()) - valid_bit |= BAT_PHYSICAL_BIT; + + const bool wi = (batl.WIMG & 0b1100) != 0; + if (wi) + valid_bit |= BAT_WI_BIT; + + // Enable fastmem mappings for cached memory. There are quirks related to uncached memory + // that fastmem doesn't emulate properly (though no normal games are known to rely on them). + if (!wi) + { + if (Memory::m_pFakeVMEM && (physical_address & 0xFE000000) == 0x7E000000) + { + valid_bit |= BAT_PHYSICAL_BIT; + } + else if (physical_address < Memory::GetRamSizeReal()) + { + valid_bit |= BAT_PHYSICAL_BIT; + } + else if (Memory::m_pEXRAM && physical_address >> 28 == 0x1 && + (physical_address & 0x0FFFFFFF) < Memory::GetExRamSizeReal()) + { + valid_bit |= BAT_PHYSICAL_BIT; + } + else if (physical_address >> 28 == 0xE && + physical_address < 0xE0000000 + Memory::GetL1CacheSize()) + { + valid_bit |= BAT_PHYSICAL_BIT; + } + } // Fastmem doesn't support memchecks, so disable it for all overlapping virtual pages. if (PowerPC::memchecks.OverlapsMemcheck(virtual_address, BAT_PAGE_SIZE)) @@ -1511,10 +1580,12 @@ void IBATUpdated() template static TranslateAddressResult TranslateAddress(u32 address) { - if (TranslateBatAddess(IsOpcodeFlag(flag) ? ibat_table : dbat_table, &address)) - return TranslateAddressResult{TranslateAddressResult::BAT_TRANSLATED, address}; + bool wi = false; - return TranslatePageAddress(address, flag); + if (TranslateBatAddess(IsOpcodeFlag(flag) ? ibat_table : dbat_table, &address, &wi)) + return TranslateAddressResult{TranslateAddressResult::BAT_TRANSLATED, address, wi}; + + return TranslatePageAddress(address, flag, &wi); } std::optional GetTranslatedAddress(u32 address) diff --git a/Source/Core/Core/PowerPC/MMU.h b/Source/Core/Core/PowerPC/MMU.h index 9191cea2ab..9e2a3a0196 100644 --- a/Source/Core/Core/PowerPC/MMU.h +++ b/Source/Core/Core/PowerPC/MMU.h @@ -86,8 +86,8 @@ HostTryReadString(u32 address, size_t size = 0, // Writes a value to emulated memory using the currently active MMU settings. // If the write fails (eg. address does not correspond to a mapped address in the current address // space), a PanicAlert will be shown to the user. -void HostWrite_U8(u8 var, u32 address); -void HostWrite_U16(u16 var, u32 address); +void HostWrite_U8(u32 var, u32 address); +void HostWrite_U16(u32 var, u32 address); void HostWrite_U32(u32 var, u32 address); void HostWrite_U64(u64 var, u32 address); void HostWrite_F32(float var, u32 address); @@ -111,9 +111,9 @@ struct TryWriteResult // If the write succeeds, the returned TryWriteResult contains information on whether the given // address had to be translated or not. Unlike the HostWrite functions, this does not raise a // user-visible alert on failure. -TryWriteResult HostTryWriteU8(u8 var, const u32 address, +TryWriteResult HostTryWriteU8(u32 var, const u32 address, RequestedAddressSpace space = RequestedAddressSpace::Effective); -TryWriteResult HostTryWriteU16(u16 var, const u32 address, +TryWriteResult HostTryWriteU16(u32 var, const u32 address, RequestedAddressSpace space = RequestedAddressSpace::Effective); TryWriteResult HostTryWriteU32(u32 var, const u32 address, RequestedAddressSpace space = RequestedAddressSpace::Effective); @@ -158,12 +158,12 @@ double Read_F64(u32 address); u32 Read_U8_ZX(u32 address); u32 Read_U16_ZX(u32 address); -void Write_U8(u8 var, u32 address); -void Write_U16(u16 var, u32 address); +void Write_U8(u32 var, u32 address); +void Write_U16(u32 var, u32 address); void Write_U32(u32 var, u32 address); void Write_U64(u64 var, u32 address); -void Write_U16_Swap(u16 var, u32 address); +void Write_U16_Swap(u32 var, u32 address); void Write_U32_Swap(u32 var, u32 address); void Write_U64_Swap(u64 var, u32 address); @@ -199,16 +199,18 @@ constexpr int BAT_INDEX_SHIFT = 17; constexpr u32 BAT_PAGE_SIZE = 1 << BAT_INDEX_SHIFT; constexpr u32 BAT_MAPPED_BIT = 0x1; constexpr u32 BAT_PHYSICAL_BIT = 0x2; -constexpr u32 BAT_RESULT_MASK = UINT32_C(~0x3); +constexpr u32 BAT_WI_BIT = 0x4; +constexpr u32 BAT_RESULT_MASK = UINT32_C(~0x7); using BatTable = std::array; // 128 KB extern BatTable ibat_table; extern BatTable dbat_table; -inline bool TranslateBatAddess(const BatTable& bat_table, u32* address) +inline bool TranslateBatAddess(const BatTable& bat_table, u32* address, bool* wi) { u32 bat_result = bat_table[*address >> BAT_INDEX_SHIFT]; if ((bat_result & BAT_MAPPED_BIT) == 0) return false; *address = (bat_result & BAT_RESULT_MASK) | (*address & (BAT_PAGE_SIZE - 1)); + *wi = (bat_result & BAT_WI_BIT) != 0; return true; }