mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-25 07:21:14 +01:00
JIT: use fastmem loads in MMU mode.
Even in games that require MMU mode, loads outside the area specified by the BAT are rare, so fastmem is a substantial improvement. All of the interesting changes are in the backpatch handler, to make it generate DSI exceptions correctly.
This commit is contained in:
parent
7ebca647b3
commit
43d56febc4
@ -96,6 +96,13 @@ u16 Read_U16(const u32 _Address);
|
||||
u32 Read_U32(const u32 _Address);
|
||||
u64 Read_U64(const u32 _Address);
|
||||
|
||||
u32 Read_S8_Val(u32 address, u32 val);
|
||||
u32 Read_U8_Val(u32 address, u32 val);
|
||||
u32 Read_S16_Val(u32 address, u32 val);
|
||||
u32 Read_U16_Val(u32 address, u32 val);
|
||||
u32 Read_U32_Val(u32 address, u32 val);
|
||||
u64 Read_U64_Val(u32 address, u64 val);
|
||||
|
||||
// Useful helper functions, used by ARM JIT
|
||||
float Read_F32(const u32 _Address);
|
||||
double Read_F64(const u32 _Address);
|
||||
|
@ -57,10 +57,12 @@ GXPeekZ
|
||||
// ----------------
|
||||
|
||||
// Overloaded byteswap functions, for use within the templated functions below.
|
||||
inline u8 bswap(u8 val) {return val;}
|
||||
inline u16 bswap(u16 val) {return Common::swap16(val);}
|
||||
inline u32 bswap(u32 val) {return Common::swap32(val);}
|
||||
inline u64 bswap(u64 val) {return Common::swap64(val);}
|
||||
inline u8 bswap(u8 val) { return val; }
|
||||
inline s8 bswap(s8 val) { return val; }
|
||||
inline u16 bswap(u16 val) { return Common::swap16(val); }
|
||||
inline s16 bswap(s16 val) { return Common::swap16(val); }
|
||||
inline u32 bswap(u32 val) { return Common::swap32(val); }
|
||||
inline u64 bswap(u64 val) { return Common::swap64(val); }
|
||||
// =================
|
||||
|
||||
|
||||
@ -89,8 +91,8 @@ static u32 EFB_Read(const u32 addr)
|
||||
|
||||
static void GenerateDSIException(u32 _EffectiveAddress, bool _bWrite);
|
||||
|
||||
template <typename T>
|
||||
inline void ReadFromHardware(T &_var, const u32 em_address, Memory::XCheckTLBFlag flag)
|
||||
template <typename T, typename U>
|
||||
inline void ReadFromHardware(U &_var, const u32 em_address, Memory::XCheckTLBFlag flag)
|
||||
{
|
||||
// TODO: Figure out the fastest order of tests for both read and write (they are probably different).
|
||||
if ((em_address & 0xC8000000) == 0xC8000000)
|
||||
@ -98,7 +100,7 @@ inline void ReadFromHardware(T &_var, const u32 em_address, Memory::XCheckTLBFla
|
||||
if (em_address < 0xcc000000)
|
||||
_var = EFB_Read(em_address);
|
||||
else
|
||||
_var = mmio_mapping->Read<T>(em_address);
|
||||
_var = (T)mmio_mapping->Read<typename std::make_unsigned<T>::type>(em_address);
|
||||
}
|
||||
else if (((em_address & 0xF0000000) == 0x80000000) ||
|
||||
((em_address & 0xF0000000) == 0xC0000000) ||
|
||||
@ -449,6 +451,42 @@ float Read_F32(const u32 _Address)
|
||||
return cvt.d;
|
||||
}
|
||||
|
||||
u32 Read_U8_Val(u32 address, u32 val)
|
||||
{
|
||||
ReadFromHardware<u8>(val, address, FLAG_READ);
|
||||
return val;
|
||||
}
|
||||
|
||||
u32 Read_S8_Val(u32 address, u32 val)
|
||||
{
|
||||
ReadFromHardware<s8>(val, address, FLAG_READ);
|
||||
return val;
|
||||
}
|
||||
|
||||
u32 Read_U16_Val(u32 address, u32 val)
|
||||
{
|
||||
ReadFromHardware<u16>(val, address, FLAG_READ);
|
||||
return val;
|
||||
}
|
||||
|
||||
u32 Read_S16_Val(u32 address, u32 val)
|
||||
{
|
||||
ReadFromHardware<s16>(val, address, FLAG_READ);
|
||||
return val;
|
||||
}
|
||||
|
||||
u32 Read_U32_Val(u32 address, u32 val)
|
||||
{
|
||||
ReadFromHardware<u32>(val, address, FLAG_READ);
|
||||
return val;
|
||||
}
|
||||
|
||||
u64 Read_U64_Val(u32 address, u64 val)
|
||||
{
|
||||
ReadFromHardware<u64>(val, address, FLAG_READ);
|
||||
return val;
|
||||
}
|
||||
|
||||
u32 Read_U8_ZX(const u32 _Address)
|
||||
{
|
||||
return (u32)Read_U8(_Address);
|
||||
|
@ -444,7 +444,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
|
||||
const u8* loadPairedFloatTwo = AlignCode4();
|
||||
if (jit->js.memcheck)
|
||||
{
|
||||
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 64, 0, QUANTIZED_REGS_TO_SAVE, false, SAFE_LOADSTORE_NO_PROLOG);
|
||||
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 64, 0, QUANTIZED_REGS_TO_SAVE, false, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG);
|
||||
ROL(64, R(RSCRATCH_EXTRA), Imm8(32));
|
||||
MOVQ_xmm(XMM0, R(RSCRATCH_EXTRA));
|
||||
}
|
||||
@ -464,7 +464,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
|
||||
const u8* loadPairedFloatOne = AlignCode4();
|
||||
if (jit->js.memcheck)
|
||||
{
|
||||
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 32, 0, QUANTIZED_REGS_TO_SAVE, false, SAFE_LOADSTORE_NO_PROLOG);
|
||||
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 32, 0, QUANTIZED_REGS_TO_SAVE, false, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG);
|
||||
MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
|
||||
UNPCKLPS(XMM0, M(m_one));
|
||||
}
|
||||
@ -486,7 +486,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
|
||||
if (jit->js.memcheck)
|
||||
{
|
||||
// TODO: Support not swapping in safeLoadToReg to avoid bswapping twice
|
||||
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 16, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_PROLOG);
|
||||
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 16, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG);
|
||||
ROR(16, R(RSCRATCH_EXTRA), Imm8(8));
|
||||
}
|
||||
else
|
||||
@ -512,7 +512,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
|
||||
|
||||
const u8* loadPairedU8One = AlignCode4();
|
||||
if (jit->js.memcheck)
|
||||
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 8, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_PROLOG);
|
||||
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 8, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG);
|
||||
else
|
||||
UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0); // RSCRATCH_EXTRA = 0x000000xx
|
||||
CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
|
||||
@ -525,7 +525,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
|
||||
if (jit->js.memcheck)
|
||||
{
|
||||
// TODO: Support not swapping in safeLoadToReg to avoid bswapping twice
|
||||
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 16, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_PROLOG);
|
||||
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 16, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG);
|
||||
ROR(16, R(RSCRATCH_EXTRA), Imm8(8));
|
||||
}
|
||||
else
|
||||
@ -551,7 +551,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
|
||||
|
||||
const u8* loadPairedS8One = AlignCode4();
|
||||
if (jit->js.memcheck)
|
||||
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 8, 0, QUANTIZED_REGS_TO_SAVE_LOAD, true, SAFE_LOADSTORE_NO_PROLOG);
|
||||
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 8, 0, QUANTIZED_REGS_TO_SAVE_LOAD, true, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG);
|
||||
else
|
||||
UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0, true);
|
||||
CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
|
||||
@ -563,7 +563,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
|
||||
const u8* loadPairedU16Two = AlignCode4();
|
||||
// TODO: Support not swapping in (un)safeLoadToReg to avoid bswapping twice
|
||||
if (jit->js.memcheck)
|
||||
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 32, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_PROLOG);
|
||||
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 32, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG);
|
||||
else
|
||||
UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 32, 0, false);
|
||||
ROL(32, R(RSCRATCH_EXTRA), Imm8(16));
|
||||
@ -585,7 +585,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
|
||||
|
||||
const u8* loadPairedU16One = AlignCode4();
|
||||
if (jit->js.memcheck)
|
||||
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 16, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_PROLOG);
|
||||
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 16, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG);
|
||||
else
|
||||
UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0, false);
|
||||
CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
|
||||
@ -596,7 +596,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
|
||||
|
||||
const u8* loadPairedS16Two = AlignCode4();
|
||||
if (jit->js.memcheck)
|
||||
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 32, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_PROLOG);
|
||||
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 32, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG);
|
||||
else
|
||||
UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 32, 0, false);
|
||||
ROL(32, R(RSCRATCH_EXTRA), Imm8(16));
|
||||
@ -618,7 +618,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
|
||||
|
||||
const u8* loadPairedS16One = AlignCode4();
|
||||
if (jit->js.memcheck)
|
||||
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 16, 0, QUANTIZED_REGS_TO_SAVE_LOAD, true, SAFE_LOADSTORE_NO_PROLOG);
|
||||
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 16, 0, QUANTIZED_REGS_TO_SAVE_LOAD, true, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG);
|
||||
else
|
||||
UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0, true);
|
||||
CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
|
||||
|
@ -85,9 +85,25 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
|
||||
else
|
||||
bswapNopCount = 2;
|
||||
|
||||
int totalSize = info.instructionSize + bswapNopCount;
|
||||
if (info.operandSize == 2 && !info.byteSwap)
|
||||
{
|
||||
if ((codePtr[totalSize] & 0xF0) == 0x40)
|
||||
{
|
||||
++totalSize;
|
||||
}
|
||||
if (codePtr[totalSize] != 0xc1 || codePtr[totalSize + 2] != 0x10)
|
||||
{
|
||||
PanicAlert("BackPatch: didn't find expected shift %p", codePtr);
|
||||
return nullptr;
|
||||
}
|
||||
info.signExtend = (codePtr[totalSize + 1] & 0x10) != 0;
|
||||
totalSize += 3;
|
||||
}
|
||||
|
||||
const u8 *trampoline = trampolines.GetReadTrampoline(info, registersInUse);
|
||||
emitter.CALL((void *)trampoline);
|
||||
int padding = info.instructionSize + bswapNopCount - BACKPATCH_SIZE;
|
||||
int padding = totalSize - BACKPATCH_SIZE;
|
||||
if (padding > 0)
|
||||
{
|
||||
emitter.NOP(padding);
|
||||
|
@ -296,8 +296,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
|
||||
{
|
||||
registersInUse[reg_value] = false;
|
||||
}
|
||||
if (!jit->js.memcheck &&
|
||||
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem &&
|
||||
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem &&
|
||||
!opAddress.IsImm() &&
|
||||
!(flags & (SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_FASTMEM))
|
||||
#ifdef ENABLE_MEM_CHECK
|
||||
|
@ -57,40 +57,55 @@ const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo &info, B
|
||||
const u8* trampoline = GetCodePtr();
|
||||
X64Reg addrReg = (X64Reg)info.scaledReg;
|
||||
X64Reg dataReg = (X64Reg)info.regOperandReg;
|
||||
registersInUse[addrReg] = true;
|
||||
registersInUse[dataReg] = false;
|
||||
|
||||
// It's a read. Easy.
|
||||
// RSP alignment here is 8 due to the call.
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, 8);
|
||||
|
||||
if (addrReg != ABI_PARAM1)
|
||||
MOV(32, R(ABI_PARAM1), R(addrReg));
|
||||
int dataRegSize = info.operandSize == 8 ? 64 : 32;
|
||||
|
||||
if (dataReg == ABI_PARAM1)
|
||||
{
|
||||
if (addrReg == ABI_PARAM2)
|
||||
{
|
||||
XCHG(dataRegSize, R(ABI_PARAM1), R(ABI_PARAM2));
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV(dataRegSize, R(ABI_PARAM2), R(dataReg));
|
||||
MOV(32, R(ABI_PARAM1), R(addrReg));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (addrReg != ABI_PARAM1)
|
||||
MOV(32, R(ABI_PARAM1), R(addrReg));
|
||||
if (dataReg != ABI_PARAM2)
|
||||
MOV(dataRegSize, R(ABI_PARAM2), R(dataReg));
|
||||
}
|
||||
|
||||
if (info.displacement)
|
||||
ADD(32, R(ABI_PARAM1), Imm32(info.displacement));
|
||||
|
||||
switch (info.operandSize)
|
||||
{
|
||||
case 8:
|
||||
CALL((void *)&Memory::Read_U64_Val);
|
||||
break;
|
||||
case 4:
|
||||
CALL((void *)&Memory::Read_U32);
|
||||
CALL((void *)&Memory::Read_U32_Val);
|
||||
break;
|
||||
case 2:
|
||||
CALL((void *)&Memory::Read_U16);
|
||||
SHL(32, R(ABI_RETURN), Imm8(16));
|
||||
CALL(info.signExtend ? (void *)&Memory::Read_S16_Val : (void *)&Memory::Read_U16_Val);
|
||||
break;
|
||||
case 1:
|
||||
CALL((void *)&Memory::Read_U8);
|
||||
CALL(info.signExtend ? (void *)&Memory::Read_S8_Val : (void *)&Memory::Read_U8_Val);
|
||||
break;
|
||||
}
|
||||
|
||||
if (info.signExtend && info.operandSize == 1)
|
||||
{
|
||||
// Need to sign extend value from Read_U8.
|
||||
MOVSX(32, 8, dataReg, R(ABI_RETURN));
|
||||
}
|
||||
else if (dataReg != EAX)
|
||||
{
|
||||
MOV(32, R(dataReg), R(ABI_RETURN));
|
||||
}
|
||||
MOV(dataRegSize, R(dataReg), R(ABI_RETURN));
|
||||
|
||||
ABI_PopRegistersAndAdjustStack(registersInUse, 8);
|
||||
RET();
|
||||
|
Loading…
x
Reference in New Issue
Block a user