diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index 6c007f68be..691a162c98 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -209,9 +209,10 @@ if(_M_X86) PowerPC/Jit64/JitRegCache.cpp PowerPC/Jit64/Jit_SystemRegisters.cpp PowerPC/Jit64Common/BlockCache.cpp + PowerPC/Jit64Common/EmuCodeBlock.cpp + PowerPC/Jit64Common/FarCodeCache.cpp PowerPC/Jit64Common/Jit64AsmCommon.cpp PowerPC/Jit64Common/Jit64Base.cpp - PowerPC/Jit64Common/Jit64Util.cpp PowerPC/Jit64Common/TrampolineCache.cpp) elseif(_M_ARM_64) set(SRCS ${SRCS} diff --git a/Source/Core/Core/Core.vcxproj b/Source/Core/Core/Core.vcxproj index 274bcd7edd..0b00d6134d 100644 --- a/Source/Core/Core/Core.vcxproj +++ b/Source/Core/Core/Core.vcxproj @@ -238,9 +238,10 @@ + + - @@ -433,10 +434,13 @@ + + - + + diff --git a/Source/Core/Core/Core.vcxproj.filters b/Source/Core/Core/Core.vcxproj.filters index 1df6538b86..2bc35664f3 100644 --- a/Source/Core/Core/Core.vcxproj.filters +++ b/Source/Core/Core/Core.vcxproj.filters @@ -735,15 +735,18 @@ PowerPC\Jit64Common + + PowerPC\Jit64Common + + + PowerPC\Jit64Common + PowerPC\Jit64Common PowerPC\Jit64Common - - PowerPC\Jit64Common - PowerPC\Jit64Common @@ -1281,18 +1284,27 @@ PowerPC\Jit64Common + + PowerPC\Jit64Common + + + PowerPC\Jit64Common + PowerPC\Jit64Common PowerPC\Jit64Common - + PowerPC\Jit64Common PowerPC\Jit64Common + + PowerPC\Jit64Common + IPC HLE %28IOS/Starlet%29\USB diff --git a/Source/Core/Core/PowerPC/CachedInterpreter.cpp b/Source/Core/Core/PowerPC/CachedInterpreter.cpp index 0173822147..a1f3c7ce08 100644 --- a/Source/Core/Core/PowerPC/CachedInterpreter.cpp +++ b/Source/Core/Core/PowerPC/CachedInterpreter.cpp @@ -10,7 +10,7 @@ #include "Core/HLE/HLE.h" #include "Core/HW/CPU.h" #include "Core/PowerPC/Gekko.h" -#include "Core/PowerPC/Jit64Common/Jit64Util.h" +#include "Core/PowerPC/Jit64Common/Jit64Base.h" #include "Core/PowerPC/PPCAnalyst.h" #include "Core/PowerPC/PowerPC.h" diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 6b88e5ecb2..1f85560234 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -26,7 +26,9 @@ #include "Core/PowerPC/Jit64/Jit64_Tables.h" #include "Core/PowerPC/Jit64/JitAsm.h" #include "Core/PowerPC/Jit64/JitRegCache.h" -#include "Core/PowerPC/Jit64Common/Jit64Util.h" +#include "Core/PowerPC/Jit64Common/FarCodeCache.h" +#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" +#include "Core/PowerPC/Jit64Common/TrampolineCache.h" #include "Core/PowerPC/JitInterface.h" #include "Core/PowerPC/PowerPC.h" #include "Core/PowerPC/Profiler.h" diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index f0e5f2feb0..5e7bc094ae 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -12,6 +12,7 @@ #include "Core/HW/CPU.h" #include "Core/HW/Memmap.h" #include "Core/PowerPC/Jit64/JitAsm.h" +#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" #include "Core/PowerPC/PowerPC.h" using namespace Gen; diff --git a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp index c3dc984c5d..278d6a126c 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp @@ -14,7 +14,7 @@ #include "Common/x64Emitter.h" #include "Core/PowerPC/Jit64/Jit.h" #include "Core/PowerPC/Jit64/JitRegCache.h" -#include "Core/PowerPC/Jit64Common/Jit64Util.h" +#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" #include "Core/PowerPC/PowerPC.h" using namespace Gen; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp index e7ea90f614..abe3d5a11a 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp @@ -9,6 +9,7 @@ #include "Core/CoreTiming.h" #include "Core/PowerPC/Gekko.h" #include "Core/PowerPC/Jit64/JitRegCache.h" +#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" #include "Core/PowerPC/PPCAnalyst.h" #include "Core/PowerPC/PowerPC.h" diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index 65339e162d..87976954dd 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -13,6 +13,7 @@ #include "Core/Core.h" #include "Core/PowerPC/Jit64/Jit.h" #include "Core/PowerPC/Jit64/JitRegCache.h" +#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" #include "Core/PowerPC/PPCAnalyst.h" #include "Core/PowerPC/PowerPC.h" diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index a8cd8a24d4..8bf3b6c87c 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -6,12 +6,13 @@ #include #include "Common/Assert.h" +#include "Common/CPUDetect.h" #include "Common/CommonTypes.h" #include "Common/MathUtil.h" #include "Common/x64Emitter.h" #include "Core/PowerPC/Jit64/Jit.h" #include "Core/PowerPC/Jit64/JitRegCache.h" -#include "Core/PowerPC/Jit64Common/Jit64Util.h" +#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" #include "Core/PowerPC/PPCAnalyst.h" #include "Core/PowerPC/PowerPC.h" diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index 973cc856cd..d3c6dfe767 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -17,7 +17,6 @@ #include "Core/HW/DSP.h" #include "Core/HW/Memmap.h" #include "Core/PowerPC/Jit64/JitRegCache.h" -#include "Core/PowerPC/Jit64Common/Jit64Util.h" #include "Core/PowerPC/JitInterface.h" #include "Core/PowerPC/PowerPC.h" diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index 2c6d29003f..7b6511a770 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -7,7 +7,7 @@ #include "Common/CommonTypes.h" #include "Common/x64Emitter.h" #include "Core/PowerPC/Jit64/JitRegCache.h" -#include "Core/PowerPC/Jit64Common/Jit64Util.h" +#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" using namespace Gen; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp index c3f18e8a85..b260de56dc 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -10,7 +10,7 @@ #include "Common/CommonTypes.h" #include "Common/x64Emitter.h" #include "Core/PowerPC/Jit64/JitRegCache.h" -#include "Core/PowerPC/Jit64Common/Jit64Util.h" +#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" #include "Core/PowerPC/JitCommon/JitAsmCommon.h" #include "Core/PowerPC/PowerPC.h" diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index 2f262540bd..dbb96a8394 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -4,12 +4,13 @@ #include "Core/PowerPC/Jit64/Jit.h" #include "Common/BitSet.h" +#include "Common/CPUDetect.h" #include "Common/CommonTypes.h" #include "Common/x64Emitter.h" #include "Core/CoreTiming.h" #include "Core/HW/ProcessorInterface.h" #include "Core/PowerPC/Jit64/JitRegCache.h" -#include "Core/PowerPC/Jit64Common/Jit64Util.h" +#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" #include "Core/PowerPC/PowerPC.h" using namespace Gen; diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64Util.cpp b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp similarity index 95% rename from Source/Core/Core/PowerPC/Jit64Common/Jit64Util.cpp rename to Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp index 9db6e5b919..0705058e1d 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/Jit64Util.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp @@ -1,22 +1,45 @@ -// Copyright 2008 Dolphin Emulator Project +// Copyright 2016 Dolphin Emulator Project // Licensed under GPLv2+ // Refer to the license.txt file included. -#include "Core/PowerPC/Jit64Common/Jit64Util.h" -#include "Common/BitSet.h" -#include "Common/CommonTypes.h" +#include "Core/PowerPC/Jit64Common/EmuCodeBlock.h" + +#include "Common/Assert.h" +#include "Common/CPUDetect.h" #include "Common/Intrinsics.h" #include "Common/MathUtil.h" -#include "Common/x64ABI.h" -#include "Common/x64Emitter.h" #include "Core/HW/MMIO.h" #include "Core/HW/Memmap.h" +#include "Core/PowerPC/Gekko.h" #include "Core/PowerPC/Jit64Common/Jit64Base.h" -#include "Core/PowerPC/Jit64Common/TrampolineCache.h" +#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" #include "Core/PowerPC/PowerPC.h" using namespace Gen; +namespace +{ +OpArg SwapImmediate(int access_size, const OpArg& reg_value) +{ + if (access_size == 32) + return Imm32(Common::swap32(reg_value.Imm32())); + + if (access_size == 16) + return Imm16(Common::swap16(reg_value.Imm16())); + + return Imm8(reg_value.Imm8()); +} + +OpArg FixImmediate(int access_size, OpArg arg) +{ + if (arg.IsImm()) + { + arg = access_size == 8 ? arg.AsImm8() : access_size == 16 ? arg.AsImm16() : arg.AsImm32(); + } + return arg; +} +} // Anonymous namespace + void EmuCodeBlock::MemoryExceptionCheck() { // TODO: We really should untangle the trampolines, exception handlers and @@ -47,6 +70,49 @@ void EmuCodeBlock::MemoryExceptionCheck() } } +void EmuCodeBlock::SwitchToFarCode() +{ + nearcode = GetWritableCodePtr(); + SetCodePtr(farcode.GetWritableCodePtr()); +} + +void EmuCodeBlock::SwitchToNearCode() +{ + farcode.SetCodePtr(GetWritableCodePtr()); + SetCodePtr(nearcode); +} + +FixupBranch EmuCodeBlock::CheckIfSafeAddress(const OpArg& reg_value, X64Reg reg_addr, + BitSet32 registers_in_use) +{ + registers_in_use[reg_addr] = true; + if (reg_value.IsSimpleReg()) + registers_in_use[reg_value.GetSimpleReg()] = true; + + // Get ourselves a free register; try to pick one that doesn't involve pushing, if we can. + X64Reg scratch = RSCRATCH; + if (!registers_in_use[RSCRATCH]) + scratch = RSCRATCH; + else if (!registers_in_use[RSCRATCH_EXTRA]) + scratch = RSCRATCH_EXTRA; + else + scratch = reg_addr; + + if (scratch == reg_addr) + PUSH(scratch); + else + MOV(32, R(scratch), R(reg_addr)); + + // Perform lookup to see if we can use fast path. + SHR(32, R(scratch), Imm8(PowerPC::BAT_INDEX_SHIFT)); + TEST(32, MScaled(scratch, SCALE_4, PtrOffset(&PowerPC::dbat_table[0])), Imm32(2)); + + if (scratch == reg_addr) + POP(scratch); + + return J_CC(CC_Z, farcode.Enabled()); +} + void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend) { @@ -63,6 +129,38 @@ void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, i MOVZX(32, accessSize, reg_value, MComplex(RMEM, reg_addr, SCALE_1, offset)); } +void EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset, + bool swap, MovInfo* info) +{ + if (info) + { + info->address = GetWritableCodePtr(); + info->nonAtomicSwapStore = false; + } + + OpArg dest = MComplex(RMEM, reg_addr, SCALE_1, offset); + if (reg_value.IsImm()) + { + if (swap) + reg_value = SwapImmediate(accessSize, reg_value); + MOV(accessSize, dest, reg_value); + } + else if (swap) + { + SwapAndStore(accessSize, dest, reg_value.GetSimpleReg(), info); + } + else + { + MOV(accessSize, dest, reg_value); + } +} + +void EmuCodeBlock::UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, + s32 offset, bool swap, Gen::MovInfo* info) +{ + UnsafeWriteRegToReg(R(reg_value), reg_addr, accessSize, offset, swap, info); +} + bool EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessSize, s32 offset, bool signExtend, MovInfo* info) { @@ -104,6 +202,28 @@ bool EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int access return offsetAddedToAddress; } +void EmuCodeBlock::UnsafeWriteGatherPipe(int accessSize) +{ + // No need to protect these, they don't touch any state + // question - should we inline them instead? Pro: Lose a CALL Con: Code bloat + switch (accessSize) + { + case 8: + CALL(jit->GetAsmRoutines()->fifoDirectWrite8); + break; + case 16: + CALL(jit->GetAsmRoutines()->fifoDirectWrite16); + break; + case 32: + CALL(jit->GetAsmRoutines()->fifoDirectWrite32); + break; + case 64: + CALL(jit->GetAsmRoutines()->fifoDirectWrite64); + break; + } + jit->js.fifoBytesSinceCheck += accessSize >> 3; +} + // Visitor that generates code to read a MMIO value. template class MMIOReadCodeGenerator : public MMIO::ReadHandlingMethodVisitor @@ -212,37 +332,6 @@ void EmuCodeBlock::MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value, } } -FixupBranch EmuCodeBlock::CheckIfSafeAddress(const OpArg& reg_value, X64Reg reg_addr, - BitSet32 registers_in_use) -{ - registers_in_use[reg_addr] = true; - if (reg_value.IsSimpleReg()) - registers_in_use[reg_value.GetSimpleReg()] = true; - - // Get ourselves a free register; try to pick one that doesn't involve pushing, if we can. - X64Reg scratch = RSCRATCH; - if (!registers_in_use[RSCRATCH]) - scratch = RSCRATCH; - else if (!registers_in_use[RSCRATCH_EXTRA]) - scratch = RSCRATCH_EXTRA; - else - scratch = reg_addr; - - if (scratch == reg_addr) - PUSH(scratch); - else - MOV(32, R(scratch), R(reg_addr)); - - // Perform lookup to see if we can use fast path. - SHR(32, R(scratch), Imm8(PowerPC::BAT_INDEX_SHIFT)); - TEST(32, MScaled(scratch, SCALE_4, PtrOffset(&PowerPC::dbat_table[0])), Imm32(2)); - - if (scratch == reg_addr) - POP(scratch); - - return J_CC(CC_Z, farcode.Enabled()); -} - void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize, s32 offset, BitSet32 registersInUse, bool signExtend, int flags) { @@ -399,119 +488,6 @@ void EmuCodeBlock::SafeLoadToRegImmediate(X64Reg reg_value, u32 address, int acc } } -static OpArg SwapImmediate(int accessSize, const OpArg& reg_value) -{ - if (accessSize == 32) - return Imm32(Common::swap32(reg_value.Imm32())); - else if (accessSize == 16) - return Imm16(Common::swap16(reg_value.Imm16())); - else - return Imm8(reg_value.Imm8()); -} - -void EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset, - bool swap, MovInfo* info) -{ - if (info) - { - info->address = GetWritableCodePtr(); - info->nonAtomicSwapStore = false; - } - - OpArg dest = MComplex(RMEM, reg_addr, SCALE_1, offset); - if (reg_value.IsImm()) - { - if (swap) - reg_value = SwapImmediate(accessSize, reg_value); - MOV(accessSize, dest, reg_value); - } - else if (swap) - { - SwapAndStore(accessSize, dest, reg_value.GetSimpleReg(), info); - } - else - { - MOV(accessSize, dest, reg_value); - } -} - -static OpArg FixImmediate(int accessSize, OpArg arg) -{ - if (arg.IsImm()) - { - arg = accessSize == 8 ? arg.AsImm8() : accessSize == 16 ? arg.AsImm16() : arg.AsImm32(); - } - return arg; -} - -void EmuCodeBlock::UnsafeWriteGatherPipe(int accessSize) -{ - // No need to protect these, they don't touch any state - // question - should we inline them instead? Pro: Lose a CALL Con: Code bloat - switch (accessSize) - { - case 8: - CALL(jit->GetAsmRoutines()->fifoDirectWrite8); - break; - case 16: - CALL(jit->GetAsmRoutines()->fifoDirectWrite16); - break; - case 32: - CALL(jit->GetAsmRoutines()->fifoDirectWrite32); - break; - case 64: - CALL(jit->GetAsmRoutines()->fifoDirectWrite64); - break; - } - jit->js.fifoBytesSinceCheck += accessSize >> 3; -} - -bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address, - BitSet32 registersInUse) -{ - arg = FixImmediate(accessSize, arg); - - // If we already know the address through constant folding, we can do some - // fun tricks... - if (jit->jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(address)) - { - if (!arg.IsSimpleReg(RSCRATCH)) - MOV(accessSize, R(RSCRATCH), arg); - - UnsafeWriteGatherPipe(accessSize); - return false; - } - else if (PowerPC::IsOptimizableRAMAddress(address)) - { - WriteToConstRamAddress(accessSize, arg, address); - return false; - } - else - { - // Helps external systems know which instruction triggered the write - MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); - - ABI_PushRegistersAndAdjustStack(registersInUse, 0); - switch (accessSize) - { - case 64: - ABI_CallFunctionAC(64, PowerPC::Write_U64, arg, address); - break; - case 32: - ABI_CallFunctionAC(32, PowerPC::Write_U32, arg, address); - break; - case 16: - ABI_CallFunctionAC(16, PowerPC::Write_U16, arg, address); - break; - case 8: - ABI_CallFunctionAC(8, PowerPC::Write_U8, arg, address); - break; - } - ABI_PopRegistersAndAdjustStack(registersInUse, 0); - return true; - } -} - void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset, BitSet32 registersInUse, int flags) { @@ -625,6 +601,63 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces } } +void EmuCodeBlock::SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, + s32 offset, BitSet32 registersInUse, int flags) +{ + SafeWriteRegToReg(R(reg_value), reg_addr, accessSize, offset, registersInUse, flags); +} + +bool EmuCodeBlock::WriteClobbersRegValue(int accessSize, bool swap) +{ + return swap && !cpu_info.bMOVBE && accessSize > 8; +} + +bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address, + BitSet32 registersInUse) +{ + arg = FixImmediate(accessSize, arg); + + // If we already know the address through constant folding, we can do some + // fun tricks... + if (jit->jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(address)) + { + if (!arg.IsSimpleReg(RSCRATCH)) + MOV(accessSize, R(RSCRATCH), arg); + + UnsafeWriteGatherPipe(accessSize); + return false; + } + else if (PowerPC::IsOptimizableRAMAddress(address)) + { + WriteToConstRamAddress(accessSize, arg, address); + return false; + } + else + { + // Helps external systems know which instruction triggered the write + MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); + + ABI_PushRegistersAndAdjustStack(registersInUse, 0); + switch (accessSize) + { + case 64: + ABI_CallFunctionAC(64, PowerPC::Write_U64, arg, address); + break; + case 32: + ABI_CallFunctionAC(32, PowerPC::Write_U32, arg, address); + break; + case 16: + ABI_CallFunctionAC(16, PowerPC::Write_U16, arg, address); + break; + case 8: + ABI_CallFunctionAC(8, PowerPC::Write_U8, arg, address); + break; + } + ABI_PopRegistersAndAdjustStack(registersInUse, 0); + return true; + } +} + void EmuCodeBlock::WriteToConstRamAddress(int accessSize, OpArg arg, u32 address, bool swap) { X64Reg reg; @@ -653,6 +686,30 @@ void EmuCodeBlock::WriteToConstRamAddress(int accessSize, OpArg arg, u32 address MOV(accessSize, MRegSum(RMEM, RSCRATCH2), R(reg)); } +void EmuCodeBlock::JitGetAndClearCAOV(bool oe) +{ + if (oe) + AND(8, PPCSTATE(xer_so_ov), Imm8(~XER_OV_MASK)); // XER.OV = 0 + SHR(8, PPCSTATE(xer_ca), Imm8(1)); // carry = XER.CA, XER.CA = 0 +} + +void EmuCodeBlock::JitSetCA() +{ + MOV(8, PPCSTATE(xer_ca), Imm8(1)); // XER.CA = 1 +} + +// Some testing shows CA is set roughly ~1/3 of the time (relative to clears), so +// branchless calculation of CA is probably faster in general. +void EmuCodeBlock::JitSetCAIf(CCFlags conditionCode) +{ + SETcc(conditionCode, PPCSTATE(xer_ca)); +} + +void EmuCodeBlock::JitClearCA() +{ + MOV(8, PPCSTATE(xer_ca), Imm8(0)); +} + void EmuCodeBlock::ForceSinglePrecision(X64Reg output, const OpArg& input, bool packed, bool duplicate) { @@ -1083,30 +1140,6 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm) OR(32, PPCSTATE(fpscr), R(RSCRATCH)); } -void EmuCodeBlock::JitGetAndClearCAOV(bool oe) -{ - if (oe) - AND(8, PPCSTATE(xer_so_ov), Imm8(~XER_OV_MASK)); // XER.OV = 0 - SHR(8, PPCSTATE(xer_ca), Imm8(1)); // carry = XER.CA, XER.CA = 0 -} - -void EmuCodeBlock::JitSetCA() -{ - MOV(8, PPCSTATE(xer_ca), Imm8(1)); // XER.CA = 1 -} - -// Some testing shows CA is set roughly ~1/3 of the time (relative to clears), so -// branchless calculation of CA is probably faster in general. -void EmuCodeBlock::JitSetCAIf(CCFlags conditionCode) -{ - SETcc(conditionCode, PPCSTATE(xer_ca)); -} - -void EmuCodeBlock::JitClearCA() -{ - MOV(8, PPCSTATE(xer_ca), Imm8(0)); -} - void EmuCodeBlock::Clear() { backPatchInfo.clear(); diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64Util.h b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h similarity index 59% rename from Source/Core/Core/PowerPC/Jit64Common/Jit64Util.h rename to Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h index 15269c2d1a..066d6b446d 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/Jit64Util.h +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h @@ -1,106 +1,23 @@ -// Copyright 2010 Dolphin Emulator Project +// Copyright 2016 Dolphin Emulator Project // Licensed under GPLv2+ -// Refer to the license.txt file included./ +// Refer to the license.txt file included. #pragma once #include #include "Common/BitSet.h" -#include "Common/CPUDetect.h" #include "Common/CommonTypes.h" #include "Common/x64Emitter.h" -#include "Core/PowerPC/PowerPC.h" + +#include "Core/PowerPC/Jit64Common/FarCodeCache.h" +#include "Core/PowerPC/Jit64Common/TrampolineInfo.h" namespace MMIO { class Mapping; } -// We offset by 0x80 because the range of one byte memory offsets is -// -0x80..0x7f. -#define PPCSTATE(x) \ - MDisp(RPPCSTATE, (int)((char*)&PowerPC::ppcState.x - (char*)&PowerPC::ppcState) - 0x80) -// In case you want to disable the ppcstate register: -// #define PPCSTATE(x) M(&PowerPC::ppcState.x) -#define PPCSTATE_LR PPCSTATE(spr[SPR_LR]) -#define PPCSTATE_CTR PPCSTATE(spr[SPR_CTR]) -#define PPCSTATE_SRR0 PPCSTATE(spr[SPR_SRR0]) -#define PPCSTATE_SRR1 PPCSTATE(spr[SPR_SRR1]) - -// A place to throw blocks of code we don't want polluting the cache, e.g. rarely taken -// exception branches. -class FarCodeCache : public Gen::X64CodeBlock -{ -private: - bool m_enabled = false; - -public: - bool Enabled() const { return m_enabled; } - void Init(int size) - { - AllocCodeSpace(size); - m_enabled = true; - } - void Shutdown() - { - FreeCodeSpace(); - m_enabled = false; - } -}; - -constexpr int CODE_SIZE = 1024 * 1024 * 32; - -// a bit of a hack; the MMU results in a vast amount more code ending up in the far cache, -// mostly exception handling, so give it a whole bunch more space if the MMU is on. -constexpr int FARCODE_SIZE = 1024 * 1024 * 8; -constexpr int FARCODE_SIZE_MMU = 1024 * 1024 * 48; - -// same for the trampoline code cache, because fastmem results in far more backpatches in MMU mode -constexpr int TRAMPOLINE_CODE_SIZE = 1024 * 1024 * 8; -constexpr int TRAMPOLINE_CODE_SIZE_MMU = 1024 * 1024 * 32; - -// Stores information we need to batch-patch a MOV with a call to the slow read/write path after -// it faults. There will be 10s of thousands of these structs live, so be wary of making this too -// big. -struct TrampolineInfo final -{ - // The start of the store operation that failed -- we will patch a JMP here - u8* start; - - // The start + len = end of the store operation (points to the next instruction) - u32 len; - - // The PPC PC for the current load/store block - u32 pc; - - // Saved because we need these to make the ABI call in the trampoline - BitSet32 registersInUse; - - // The MOV operation - Gen::X64Reg nonAtomicSwapStoreSrc; - - // src/dest for load/store - s32 offset; - Gen::X64Reg op_reg; - Gen::OpArg op_arg; - - // Original SafeLoadXXX/SafeStoreXXX flags - u8 flags; - - // Memory access size (in bytes) - u8 accessSize : 4; - - // true if this is a read op vs a write - bool read : 1; - - // for read operations, true if needs sign-extension after load - bool signExtend : 1; - - // Set to true if we added the offset to the address and need to undo it - bool offsetAddedToAddress : 1; -}; - // Like XCodeBlock but has some utilities for memory access. class EmuCodeBlock : public Gen::X64CodeBlock { @@ -111,17 +28,8 @@ public: void MemoryExceptionCheck(); // Simple functions to switch between near and far code emitting - void SwitchToFarCode() - { - nearcode = GetWritableCodePtr(); - SetCodePtr(farcode.GetWritableCodePtr()); - } - - void SwitchToNearCode() - { - farcode.SetCodePtr(GetWritableCodePtr()); - SetCodePtr(nearcode); - } + void SwitchToFarCode(); + void SwitchToNearCode(); Gen::FixupBranch CheckIfSafeAddress(const Gen::OpArg& reg_value, Gen::X64Reg reg_addr, BitSet32 registers_in_use); @@ -133,10 +41,8 @@ public: void UnsafeWriteRegToReg(Gen::OpArg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true, Gen::MovInfo* info = nullptr); void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, - s32 offset = 0, bool swap = true, Gen::MovInfo* info = nullptr) - { - UnsafeWriteRegToReg(R(reg_value), reg_addr, accessSize, offset, swap, info); - } + s32 offset = 0, bool swap = true, Gen::MovInfo* info = nullptr); + bool UnsafeLoadToReg(Gen::X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset, bool signExtend, Gen::MovInfo* info = nullptr); void UnsafeWriteGatherPipe(int accessSize); @@ -169,20 +75,15 @@ public: void SafeWriteRegToReg(Gen::OpArg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, BitSet32 registersInUse, int flags = 0); void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, - BitSet32 registersInUse, int flags = 0) - { - SafeWriteRegToReg(R(reg_value), reg_addr, accessSize, offset, registersInUse, flags); - } + BitSet32 registersInUse, int flags = 0); // applies to safe and unsafe WriteRegToReg - bool WriteClobbersRegValue(int accessSize, bool swap) - { - return swap && !cpu_info.bMOVBE && accessSize > 8; - } + bool WriteClobbersRegValue(int accessSize, bool swap); - void WriteToConstRamAddress(int accessSize, Gen::OpArg arg, u32 address, bool swap = true); // returns true if an exception could have been caused bool WriteToConstAddress(int accessSize, Gen::OpArg arg, u32 address, BitSet32 registersInUse); + void WriteToConstRamAddress(int accessSize, Gen::OpArg arg, u32 address, bool swap = true); + void JitGetAndClearCAOV(bool oe); void JitSetCA(); void JitSetCAIf(Gen::CCFlags conditionCode); diff --git a/Source/Core/Core/PowerPC/Jit64Common/FarCodeCache.cpp b/Source/Core/Core/PowerPC/Jit64Common/FarCodeCache.cpp new file mode 100644 index 0000000000..cd9d5a9462 --- /dev/null +++ b/Source/Core/Core/PowerPC/Jit64Common/FarCodeCache.cpp @@ -0,0 +1,22 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "Core/PowerPC/Jit64Common/FarCodeCache.h" + +void FarCodeCache::Init(int size) +{ + AllocCodeSpace(size); + m_enabled = true; +} + +void FarCodeCache::Shutdown() +{ + FreeCodeSpace(); + m_enabled = false; +} + +bool FarCodeCache::Enabled() const +{ + return m_enabled; +} diff --git a/Source/Core/Core/PowerPC/Jit64Common/FarCodeCache.h b/Source/Core/Core/PowerPC/Jit64Common/FarCodeCache.h new file mode 100644 index 0000000000..2606de65fe --- /dev/null +++ b/Source/Core/Core/PowerPC/Jit64Common/FarCodeCache.h @@ -0,0 +1,26 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include "Common/x64Emitter.h" + +// a bit of a hack; the MMU results in a vast amount more code ending up in the far cache, +// mostly exception handling, so give it a whole bunch more space if the MMU is on. +constexpr int FARCODE_SIZE = 1024 * 1024 * 8; +constexpr int FARCODE_SIZE_MMU = 1024 * 1024 * 48; + +// A place to throw blocks of code we don't want polluting the cache, e.g. rarely taken +// exception branches. +class FarCodeCache : public Gen::X64CodeBlock +{ +public: + void Init(int size); + void Shutdown(); + + bool Enabled() const; + +private: + bool m_enabled = false; +}; diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp index e1ab7425ab..e9ec6cee8e 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp @@ -4,6 +4,7 @@ #include "Core/PowerPC/Jit64Common/Jit64AsmCommon.h" #include "Common/Assert.h" +#include "Common/CPUDetect.h" #include "Common/CommonTypes.h" #include "Common/JitRegister.h" #include "Common/MathUtil.h" @@ -12,7 +13,7 @@ #include "Core/HW/GPFifo.h" #include "Core/PowerPC/Gekko.h" #include "Core/PowerPC/Jit64Common/Jit64Base.h" -#include "Core/PowerPC/Jit64Common/Jit64Util.h" +#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" #include "Core/PowerPC/PowerPC.h" #define QUANTIZED_REGS_TO_SAVE \ diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.h b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.h index e4cdad983a..c098400f55 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.h +++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.h @@ -5,7 +5,7 @@ #pragma once #include "Common/CommonTypes.h" -#include "Core/PowerPC/Jit64Common/Jit64Util.h" +#include "Core/PowerPC/Jit64Common/EmuCodeBlock.h" #include "Core/PowerPC/JitCommon/JitAsmCommon.h" enum EQuantizeType : u32; diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64Base.h b/Source/Core/Core/PowerPC/Jit64Common/Jit64Base.h index 8aaec2c6d7..b6259dd430 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/Jit64Base.h +++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64Base.h @@ -32,6 +32,8 @@ constexpr Gen::X64Reg RMEM = Gen::RBX; // to address as much as possible in a one-byte offset form. constexpr Gen::X64Reg RPPCSTATE = Gen::RBP; +constexpr int CODE_SIZE = 1024 * 1024 * 32; + class Jitx86Base : public JitBase, public QuantizedMemoryRoutines { protected: diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64PowerPCState.h b/Source/Core/Core/PowerPC/Jit64Common/Jit64PowerPCState.h new file mode 100644 index 0000000000..48f2c039f5 --- /dev/null +++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64PowerPCState.h @@ -0,0 +1,19 @@ +// Copyright 2010 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included./ + +#pragma once + +#include "Common/CommonTypes.h" +#include "Core/PowerPC/PowerPC.h" + +// We offset by 0x80 because the range of one byte memory offsets is +// -0x80..0x7f. +#define PPCSTATE(x) \ + MDisp(RPPCSTATE, (int)((char*)&PowerPC::ppcState.x - (char*)&PowerPC::ppcState) - 0x80) +// In case you want to disable the ppcstate register: +// #define PPCSTATE(x) M(&PowerPC::ppcState.x) +#define PPCSTATE_LR PPCSTATE(spr[SPR_LR]) +#define PPCSTATE_CTR PPCSTATE(spr[SPR_CTR]) +#define PPCSTATE_SRR0 PPCSTATE(spr[SPR_SRR0]) +#define PPCSTATE_SRR1 PPCSTATE(spr[SPR_SRR1]) diff --git a/Source/Core/Core/PowerPC/Jit64Common/TrampolineCache.cpp b/Source/Core/Core/PowerPC/Jit64Common/TrampolineCache.cpp index 67a2e4a9e5..954aac6fc5 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/TrampolineCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/TrampolineCache.cpp @@ -12,7 +12,8 @@ #include "Common/MsgHandler.h" #include "Common/x64Emitter.h" #include "Core/PowerPC/Jit64Common/Jit64Base.h" -#include "Core/PowerPC/Jit64Common/Jit64Util.h" +#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" +#include "Core/PowerPC/Jit64Common/TrampolineInfo.h" #include "Core/PowerPC/JitCommon/JitBase.h" #include "Core/PowerPC/PowerPC.h" diff --git a/Source/Core/Core/PowerPC/Jit64Common/TrampolineCache.h b/Source/Core/Core/PowerPC/Jit64Common/TrampolineCache.h index c9f2bcb41e..12ee83ec2d 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/TrampolineCache.h +++ b/Source/Core/Core/PowerPC/Jit64Common/TrampolineCache.h @@ -5,7 +5,14 @@ #pragma once #include "Common/CommonTypes.h" -#include "Core/PowerPC/Jit64Common/Jit64Util.h" +#include "Core/PowerPC/Jit64Common/EmuCodeBlock.h" + +struct TrampolineInfo; + +// a bit of a hack; the MMU results in more code ending up in the trampoline cache, +// because fastmem results in far more backpatches in MMU mode +constexpr int TRAMPOLINE_CODE_SIZE = 1024 * 1024 * 8; +constexpr int TRAMPOLINE_CODE_SIZE_MMU = 1024 * 1024 * 32; // We need at least this many bytes for backpatching. constexpr int BACKPATCH_SIZE = 5; diff --git a/Source/Core/Core/PowerPC/Jit64Common/TrampolineInfo.h b/Source/Core/Core/PowerPC/Jit64Common/TrampolineInfo.h new file mode 100644 index 0000000000..2ca6b2826a --- /dev/null +++ b/Source/Core/Core/PowerPC/Jit64Common/TrampolineInfo.h @@ -0,0 +1,50 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include "Common/BitSet.h" +#include "Common/CommonTypes.h" +#include "Common/x64Emitter.h" + +// Stores information we need to batch-patch a MOV with a call to the slow read/write path after +// it faults. There will be 10s of thousands of these structs live, so be wary of making this too +// big. +struct TrampolineInfo final +{ + // The start of the store operation that failed -- we will patch a JMP here + u8* start; + + // The start + len = end of the store operation (points to the next instruction) + u32 len; + + // The PPC PC for the current load/store block + u32 pc; + + // Saved because we need these to make the ABI call in the trampoline + BitSet32 registersInUse; + + // The MOV operation + Gen::X64Reg nonAtomicSwapStoreSrc; + + // src/dest for load/store + s32 offset; + Gen::X64Reg op_reg; + Gen::OpArg op_arg; + + // Original SafeLoadXXX/SafeStoreXXX flags + u8 flags; + + // Memory access size (in bytes) + u8 accessSize : 4; + + // true if this is a read op vs a write + bool read : 1; + + // for read operations, true if needs sign-extension after load + bool signExtend : 1; + + // Set to true if we added the offset to the address and need to undo it + bool offsetAddedToAddress : 1; +}; diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp index 20b7919891..83eb202eb2 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp @@ -39,6 +39,7 @@ The register allocation is linear scan allocation. #include "Core/HW/CPU.h" #include "Core/HW/ProcessorInterface.h" #include "Core/PowerPC/Gekko.h" +#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" #include "Core/PowerPC/Jit64IL/JitIL.h" #include "Core/PowerPC/PowerPC.h" diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp index 62259cf579..0ec2375896 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp @@ -18,6 +18,7 @@ #include "Core/HLE/HLE.h" #include "Core/HW/CPU.h" #include "Core/PatchEngine.h" +#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" #include "Core/PowerPC/Jit64IL/JitIL.h" #include "Core/PowerPC/Jit64IL/JitIL_Tables.h" #include "Core/PowerPC/PowerPC.h" diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStorePaired.cpp index ec92f1ed08..5524f9db24 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStorePaired.cpp @@ -3,7 +3,9 @@ // Refer to the license.txt file included. #include "Core/PowerPC/JitILCommon/JitILBase.h" + #include "Common/CommonTypes.h" +#include "Core/PowerPC/PowerPC.h" void JitILBase::psq_st(UGeckoInstruction inst) {