diff --git a/Source/Core/Core/HW/Memmap.h b/Source/Core/Core/HW/Memmap.h index c22d2b6a67..603f1dc10b 100644 --- a/Source/Core/Core/HW/Memmap.h +++ b/Source/Core/Core/HW/Memmap.h @@ -96,13 +96,6 @@ u16 Read_U16(const u32 address); u32 Read_U32(const u32 address); u64 Read_U64(const u32 address); -u32 Read_S8_Val(const u32 address, u32 var); -u32 Read_U8_Val(const u32 address, u32 var); -u32 Read_S16_Val(const u32 address, u32 var); -u32 Read_U16_Val(const u32 address, u32 var); -u32 Read_U32_Val(const u32 address, u32 var); -u64 Read_U64_Val(const u32 address, u64 var); - // Useful helper functions, used by ARM JIT float Read_F32(const u32 address); double Read_F64(const u32 address); diff --git a/Source/Core/Core/HW/MemmapFunctions.cpp b/Source/Core/Core/HW/MemmapFunctions.cpp index 04736a650c..8fbe067a9f 100644 --- a/Source/Core/Core/HW/MemmapFunctions.cpp +++ b/Source/Core/Core/HW/MemmapFunctions.cpp @@ -92,8 +92,8 @@ static u32 EFB_Read(const u32 addr) static void GenerateDSIException(u32 _EffectiveAddress, bool _bWrite); -template -__forceinline void ReadFromHardware(U &_var, const u32 em_address) +template +__forceinline T ReadFromHardware(const u32 em_address) { int segment = em_address >> 28; // Quick check for an address that can't meet any of the following conditions, @@ -104,33 +104,28 @@ __forceinline void ReadFromHardware(U &_var, const u32 em_address) if ((em_address & 0xC8000000) == 0xC8000000) { if (em_address < 0xcc000000) - _var = EFB_Read(em_address); + return EFB_Read(em_address); else - _var = (T)mmio_mapping->Read::type>(em_address); - return; + return (T)mmio_mapping->Read::type>(em_address); } else if (segment == 0x8 || segment == 0xC || segment == 0x0) { - _var = bswap((*(const T*)&m_pRAM[em_address & RAM_MASK])); - return; + return bswap((*(const T*)&m_pRAM[em_address & RAM_MASK])); } else if (m_pEXRAM && (segment == 0x9 || segment == 0xD || segment == 0x1)) { - _var = bswap((*(const T*)&m_pEXRAM[em_address & EXRAM_MASK])); - return; + return bswap((*(const T*)&m_pEXRAM[em_address & EXRAM_MASK])); } else if (segment == 0xE && (em_address < (0xE0000000 + L1_CACHE_SIZE))) { - _var = bswap((*(const T*)&m_pL1Cache[em_address & L1_CACHE_MASK])); - return; + return bswap((*(const T*)&m_pL1Cache[em_address & L1_CACHE_MASK])); } } if (bFakeVMEM && (segment == 0x7 || segment == 0x4)) { // fake VMEM - _var = bswap((*(const T*)&m_pFakeVMEM[em_address & FAKEVMEM_MASK])); - return; + return bswap((*(const T*)&m_pFakeVMEM[em_address & FAKEVMEM_MASK])); } // MMU: Do page table translation @@ -139,7 +134,7 @@ __forceinline void ReadFromHardware(U &_var, const u32 em_address) { if (flag == FLAG_READ) GenerateDSIException(em_address, false); - return; + return 0; } // Handle loads that cross page boundaries (ewwww) @@ -157,20 +152,20 @@ __forceinline void ReadFromHardware(U &_var, const u32 em_address) { if (flag == FLAG_READ) GenerateDSIException(em_address_next_page, false); - return; + return 0; } - _var = 0; + T var = 0; for (u32 addr = em_address; addr < em_address + sizeof(T); addr++, tlb_addr++) { if (addr == em_address_next_page) tlb_addr = tlb_addr_next_page; - _var = (_var << 8) | Memory::base[tlb_addr]; + var = (var << 8) | Memory::base[tlb_addr]; } - return; + return var; } // The easy case! - _var = bswap(*(const T*)&Memory::base[tlb_addr]); + return bswap(*(const T*)&Memory::base[tlb_addr]); } @@ -331,32 +326,28 @@ static __forceinline void Memcheck(u32 address, u32 var, bool write, int size) u8 Read_U8(const u32 address) { - u8 var = 0; - ReadFromHardware(var, address); + u8 var = ReadFromHardware(address); Memcheck(address, var, false, 1); return (u8)var; } u16 Read_U16(const u32 address) { - u16 var = 0; - ReadFromHardware(var, address); + u16 var = ReadFromHardware(address); Memcheck(address, var, false, 2); return (u16)var; } u32 Read_U32(const u32 address) { - u32 var = 0; - ReadFromHardware(var, address); + u32 var = ReadFromHardware(address); Memcheck(address, var, false, 4); return var; } u64 Read_U64(const u32 address) { - u64 var = 0; - ReadFromHardware(var, address); + u64 var = ReadFromHardware(address); Memcheck(address, (u32)var, false, 8); return var; } @@ -385,48 +376,6 @@ float Read_F32(const u32 address) return cvt.d; } -u32 Read_U8_Val(const u32 address, u32 var) -{ - ReadFromHardware(var, address); - Memcheck(address, var, false, 1); - return var; -} - -u32 Read_S8_Val(const u32 address, u32 var) -{ - ReadFromHardware(var, address); - Memcheck(address, var, false, 1); - return var; -} - -u32 Read_U16_Val(const u32 address, u32 var) -{ - ReadFromHardware(var, address); - Memcheck(address, var, false, 2); - return var; -} - -u32 Read_S16_Val(const u32 address, u32 var) -{ - ReadFromHardware(var, address); - Memcheck(address, var, false, 2); - return var; -} - -u32 Read_U32_Val(const u32 address, u32 var) -{ - ReadFromHardware(var, address); - Memcheck(address, var, false, 4); - return var; -} - -u64 Read_U64_Val(const u32 address, u64 var) -{ - ReadFromHardware(var, address); - Memcheck(address, (u32)var, false, 8); - return var; -} - u32 Read_U8_ZX(const u32 address) { return (u32)Read_U8(address); @@ -489,16 +438,14 @@ void Write_F64(const double var, const u32 address) } u8 ReadUnchecked_U8(const u32 address) { - u8 var = 0; - ReadFromHardware(var, address); + u8 var = ReadFromHardware(address); return var; } u32 ReadUnchecked_U32(const u32 address) { - u32 var = 0; - ReadFromHardware(var, address); + u32 var = ReadFromHardware(address); return var; } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 436364d0de..e10256f491 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -615,6 +615,8 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc js.downcountAmount += opinfo->numCycles; js.fastmemLoadStore = NULL; js.fixupExceptionHandler = false; + js.revertGprLoad = -1; + js.revertFprLoad = -1; if (i == (code_block.m_num_instructions - 1)) { @@ -787,8 +789,14 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc exceptionHandlerAtLoc[js.fastmemLoadStore] = GetWritableCodePtr(); } - gpr.Flush(FLUSH_MAINTAIN_STATE); - fpr.Flush(FLUSH_MAINTAIN_STATE); + BitSet32 gprToFlush = BitSet32::AllTrue(32); + BitSet32 fprToFlush = BitSet32::AllTrue(32); + if (js.revertGprLoad >= 0) + gprToFlush[js.revertGprLoad] = false; + if (js.revertFprLoad >= 0) + fprToFlush[js.revertFprLoad] = false; + gpr.Flush(FLUSH_MAINTAIN_STATE, gprToFlush); + fpr.Flush(FLUSH_MAINTAIN_STATE, fprToFlush); // If a memory exception occurs, the exception handler will read // from PC. Update PC with the latest value in case that happens. diff --git a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp index f91694ba9e..334c46379e 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp @@ -401,7 +401,7 @@ void FPURegCache::StoreRegister(size_t preg, OpArg newLoc) emit->MOVAPD(newLoc, regs[preg].location.GetSimpleReg()); } -void RegCache::Flush(FlushMode mode) +void RegCache::Flush(FlushMode mode, BitSet32 regsToFlush) { for (unsigned int i = 0; i < xregs.size(); i++) { @@ -409,7 +409,7 @@ void RegCache::Flush(FlushMode mode) PanicAlert("Someone forgot to unlock X64 reg %u", i); } - for (unsigned int i = 0; i < regs.size(); i++) + for (unsigned int i : regsToFlush) { if (regs[i].locked) { diff --git a/Source/Core/Core/PowerPC/Jit64/JitRegCache.h b/Source/Core/Core/PowerPC/Jit64/JitRegCache.h index 3943e83852..0e2f2ea687 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitRegCache.h +++ b/Source/Core/Core/PowerPC/Jit64/JitRegCache.h @@ -81,7 +81,7 @@ public: LockX(reg1); LockX(reg2); } - void Flush(FlushMode mode = FLUSH_ALL); + void Flush(FlushMode mode = FLUSH_ALL, BitSet32 regsToFlush = BitSet32::AllTrue(32)); void Flush(PPCAnalyst::CodeOp *op) {Flush();} int SanityCheck() const; void KillImmediate(size_t preg, bool doLoad, bool makeDirty); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index 1edd6b3868..c322c2248f 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -246,9 +246,23 @@ void Jit64::lXXx(UGeckoInstruction inst) } gpr.Lock(a, b, d); + if (update && storeAddress) gpr.BindToRegister(a, true, true); - gpr.BindToRegister(d, js.memcheck, true); + + // A bit of an evil hack here. We need to retain the original value of this register for the + // exception path, but we'd rather not needlessly pass it around if we don't have to, since + // the exception path is very rare. So we store the value in the regcache, let the load path + // clobber it, then restore the value in the exception path. + // TODO: no other load has to do this at the moment, since no other loads go directly to the + // target registers, but if that ever changes, we need to do it there too. + if (js.memcheck) + { + gpr.StoreFromRegister(d); + js.revertGprLoad = d; + } + gpr.BindToRegister(d, false, true); + BitSet32 registersInUse = CallerSavedRegistersInUse(); // We need to save the (usually scratch) address register for the update. if (update && storeAddress) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index 4cfbc3b756..bc61136a6c 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -66,7 +66,12 @@ void Jit64::lfXXX(UGeckoInstruction inst) } fpr.Lock(d); - fpr.BindToRegister(d, js.memcheck || !single); + if (js.memcheck && single) + { + fpr.StoreFromRegister(d); + js.revertFprLoad = d; + } + fpr.BindToRegister(d, !single); BitSet32 registersInUse = CallerSavedRegistersInUse(); if (update && js.memcheck) registersInUse[RSCRATCH2] = true; diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index 5a526f8f48..cb79f3f511 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -79,6 +79,10 @@ protected: // so just fixup that branch instead of testing for a DSI again. bool fixupExceptionHandler; Gen::FixupBranch exceptionHandler; + // If these are set, we've stored the old value of a register which will be loaded in revertLoad, + // which lets us revert it on the exception path. + int revertGprLoad; + int revertFprLoad; bool firstFPInstructionFound; bool isLastInstruction; diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index fef3e90677..1209e2bd46 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -302,10 +302,7 @@ FixupBranch EmuCodeBlock::CheckIfSafeAddress(OpArg reg_value, X64Reg reg_addr, B void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, BitSet32 registersInUse, bool signExtend, int flags) { - if (!jit->js.memcheck) - { - registersInUse[reg_value] = false; - } + registersInUse[reg_value] = false; if (SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem && !opAddress.IsImm() && !(flags & (SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_FASTMEM)) diff --git a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp index b91a0f13ca..f5bbea78dc 100644 --- a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp @@ -42,39 +42,58 @@ const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo &info, B const u8* trampoline = GetCodePtr(); X64Reg addrReg = (X64Reg)info.scaledReg; X64Reg dataReg = (X64Reg)info.regOperandReg; - registersInUse[addrReg] = true; - registersInUse[dataReg] = false; + int stack_offset = 0; + bool push_param1 = registersInUse[ABI_PARAM1]; - ABI_PushRegistersAndAdjustStack(registersInUse, 0); + if (push_param1) + { + PUSH(ABI_PARAM1); + stack_offset = 8; + registersInUse[ABI_PARAM1] = 0; + } int dataRegSize = info.operandSize == 8 ? 64 : 32; - MOVTwo(dataRegSize, ABI_PARAM1, addrReg, info.displacement, ABI_PARAM2, dataReg); + if (addrReg != ABI_PARAM1 && info.displacement) + LEA(32, ABI_PARAM1, MDisp(addrReg, info.displacement)); + else if (addrReg != ABI_PARAM1) + MOV(32, R(ABI_PARAM1), R(addrReg)); + else if (info.displacement) + ADD(32, R(ABI_PARAM1), Imm32(info.displacement)); + + ABI_PushRegistersAndAdjustStack(registersInUse, stack_offset); switch (info.operandSize) { case 8: - CALL((void *)&Memory::Read_U64_Val); + CALL((void *)&Memory::Read_U64); break; case 4: - CALL((void *)&Memory::Read_U32_Val); + CALL((void *)&Memory::Read_U32); break; case 2: - CALL(info.signExtend ? (void *)&Memory::Read_S16_Val : (void *)&Memory::Read_U16_Val); + CALL((void *)&Memory::Read_U16); break; case 1: - CALL(info.signExtend ? (void *)&Memory::Read_S8_Val : (void *)&Memory::Read_U8_Val); + CALL((void *)&Memory::Read_U8); break; } - if (dataReg != ABI_RETURN) - MOV(dataRegSize, R(dataReg), R(ABI_RETURN)); + ABI_PopRegistersAndAdjustStack(registersInUse, stack_offset); + + if (push_param1) + POP(ABI_PARAM1); - ABI_PopRegistersAndAdjustStack(registersInUse, 0); if (exceptionHandler) { TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI)); J_CC(CC_NZ, exceptionHandler); } + + if (info.signExtend) + MOVSX(dataRegSize, info.operandSize * 8, dataReg, R(ABI_RETURN)); + else if (dataReg != ABI_RETURN || info.operandSize < 4) + MOVZX(dataRegSize, info.operandSize * 8, dataReg, R(ABI_RETURN)); + JMP(returnPtr, true); return trampoline; }