From 4feddd7748de682c8ac69415e8408d2552d60967 Mon Sep 17 00:00:00 2001 From: degasus Date: Sat, 18 Nov 2017 14:14:45 +0100 Subject: [PATCH 1/5] PowerPC: Include the gather pipe pointer in the ppc state. --- Source/Core/Core/HW/GPFifo.cpp | 24 +++++++++---------- Source/Core/Core/HW/GPFifo.h | 3 --- .../PowerPC/Jit64Common/Jit64AsmCommon.cpp | 5 ++-- .../PowerPC/JitArm64/JitArm64_LoadStore.cpp | 3 +-- .../JitArm64/JitArm64_LoadStoreFloating.cpp | 3 +-- Source/Core/Core/PowerPC/PowerPC.h | 3 +++ 6 files changed, 18 insertions(+), 23 deletions(-) diff --git a/Source/Core/Core/HW/GPFifo.cpp b/Source/Core/Core/HW/GPFifo.cpp index 403321fe37..f786af6eac 100644 --- a/Source/Core/Core/HW/GPFifo.cpp +++ b/Source/Core/Core/HW/GPFifo.cpp @@ -13,6 +13,7 @@ #include "Core/HW/Memmap.h" #include "Core/HW/ProcessorInterface.h" #include "Core/PowerPC/JitInterface.h" +#include "Core/PowerPC/PowerPC.h" #include "VideoCommon/CommandProcessor.h" namespace GPFifo @@ -31,17 +32,14 @@ namespace GPFifo // More room for the fastmodes alignas(32) static u8 s_gather_pipe[GATHER_PIPE_SIZE * 16]; -// pipe pointer -u8* g_gather_pipe_ptr = s_gather_pipe; - static size_t GetGatherPipeCount() { - return g_gather_pipe_ptr - s_gather_pipe; + return PowerPC::ppcState.gather_pipe_ptr - s_gather_pipe; } static void SetGatherPipeCount(size_t size) { - g_gather_pipe_ptr = s_gather_pipe + size; + PowerPC::ppcState.gather_pipe_ptr = s_gather_pipe + size; } void DoState(PointerWrap& p) @@ -144,29 +142,29 @@ void Write64(const u64 value) void FastWrite8(const u8 value) { - *g_gather_pipe_ptr = value; - g_gather_pipe_ptr += sizeof(u8); + *PowerPC::ppcState.gather_pipe_ptr = value; + PowerPC::ppcState.gather_pipe_ptr += sizeof(u8); } void FastWrite16(u16 value) { value = Common::swap16(value); - std::memcpy(g_gather_pipe_ptr, &value, sizeof(u16)); - g_gather_pipe_ptr += sizeof(u16); + std::memcpy(PowerPC::ppcState.gather_pipe_ptr, &value, sizeof(u16)); + PowerPC::ppcState.gather_pipe_ptr += sizeof(u16); } void FastWrite32(u32 value) { value = Common::swap32(value); - std::memcpy(g_gather_pipe_ptr, &value, sizeof(u32)); - g_gather_pipe_ptr += sizeof(u32); + std::memcpy(PowerPC::ppcState.gather_pipe_ptr, &value, sizeof(u32)); + PowerPC::ppcState.gather_pipe_ptr += sizeof(u32); } void FastWrite64(u64 value) { value = Common::swap64(value); - std::memcpy(g_gather_pipe_ptr, &value, sizeof(u64)); - g_gather_pipe_ptr += sizeof(u64); + std::memcpy(PowerPC::ppcState.gather_pipe_ptr, &value, sizeof(u64)); + PowerPC::ppcState.gather_pipe_ptr += sizeof(u64); } } // end of namespace GPFifo diff --git a/Source/Core/Core/HW/GPFifo.h b/Source/Core/Core/HW/GPFifo.h index eb37c589b9..6fb331ef37 100644 --- a/Source/Core/Core/HW/GPFifo.h +++ b/Source/Core/Core/HW/GPFifo.h @@ -15,9 +15,6 @@ enum GATHER_PIPE_SIZE = 32 }; -// pipe pointer for JIT access -extern u8* g_gather_pipe_ptr; - // Init void Init(); void DoState(PointerWrap& p); diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp index 1d8ce07be4..fb9e5f3433 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp @@ -12,7 +12,6 @@ #include "Common/MathUtil.h" #include "Common/x64ABI.h" #include "Common/x64Emitter.h" -#include "Core/HW/GPFifo.h" #include "Core/PowerPC/Gekko.h" #include "Core/PowerPC/Jit64Common/Jit64Base.h" #include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" @@ -30,10 +29,10 @@ void CommonAsmRoutines::GenFifoWrite(int size) const void* start = GetCodePtr(); // Assume value in RSCRATCH - MOV(64, R(RSCRATCH2), ImmPtr(&GPFifo::g_gather_pipe_ptr)); + MOV(64, R(RSCRATCH2), ImmPtr(&PowerPC::ppcState.gather_pipe_ptr)); MOV(64, R(RSCRATCH2), MatR(RSCRATCH2)); SwapAndStore(size, MatR(RSCRATCH2), RSCRATCH); - MOV(64, R(RSCRATCH), ImmPtr(&GPFifo::g_gather_pipe_ptr)); + MOV(64, R(RSCRATCH), ImmPtr(&PowerPC::ppcState.gather_pipe_ptr)); ADD(64, R(RSCRATCH2), Imm8(size >> 3)); MOV(64, MatR(RSCRATCH), R(RSCRATCH2)); RET(); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index b4e057753e..96b03340f8 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -10,7 +10,6 @@ #include "Core/Core.h" #include "Core/CoreTiming.h" #include "Core/HW/DSP.h" -#include "Core/HW/GPFifo.h" #include "Core/HW/MMIO.h" #include "Core/HW/Memmap.h" #include "Core/PowerPC/JitArm64/Jit.h" @@ -242,7 +241,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s if (accessSize != 8) WA = gpr.GetReg(); - MOVP2R(X1, &GPFifo::g_gather_pipe_ptr); + MOVP2R(X1, &PowerPC::ppcState.gather_pipe_ptr); LDR(INDEX_UNSIGNED, X0, X1, 0); if (accessSize == 32) { diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index e90f5f6f9c..12b8c43afc 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -10,7 +10,6 @@ #include "Core/Core.h" #include "Core/CoreTiming.h" -#include "Core/HW/GPFifo.h" #include "Core/PowerPC/JitArm64/Jit.h" #include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" #include "Core/PowerPC/PPCTables.h" @@ -357,7 +356,7 @@ void JitArm64::stfXX(UGeckoInstruction inst) else accessSize = 32; - MOVP2R(X1, &GPFifo::g_gather_pipe_ptr); + MOVP2R(X1, &PowerPC::ppcState.gather_pipe_ptr); LDR(INDEX_UNSIGNED, X0, X1, 0); if (flags & BackPatchInfo::FLAG_SIZE_F64) { diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 9e53c239b9..81248fb27e 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -93,6 +93,9 @@ struct PowerPCState // lscbx u16 xer_stringctrl; + // gather pipe pointer for JIT access + u8* gather_pipe_ptr; + #if _M_X86_64 // This member exists for the purpose of an assertion in x86 JitBase.cpp // that its offset <= 0x100. To minimize code size on x86, we want as much From 36ad887a19c0ef011c88a6d49ae380ac76e41500 Mon Sep 17 00:00:00 2001 From: degasus Date: Sat, 18 Nov 2017 14:45:09 +0100 Subject: [PATCH 2/5] Jit64: Inline GP writes. As we're down to 4 instructions now, it is always worth to inline those writes. --- Source/Core/Core/PowerPC/Jit64/JitAsm.cpp | 8 ---- .../Core/PowerPC/Jit64Common/EmuCodeBlock.cpp | 40 +++++++------------ .../Core/PowerPC/Jit64Common/EmuCodeBlock.h | 1 - .../PowerPC/Jit64Common/Jit64AsmCommon.cpp | 16 -------- .../Core/PowerPC/Jit64Common/Jit64AsmCommon.h | 1 - .../Core/PowerPC/JitCommon/JitAsmCommon.h | 5 --- 6 files changed, 15 insertions(+), 56 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index 68434f1789..9c62303f50 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -222,14 +222,6 @@ void Jit64AsmRoutineManager::ResetStack(X64CodeBlock& emitter) void Jit64AsmRoutineManager::GenerateCommon() { - fifoDirectWrite8 = AlignCode4(); - GenFifoWrite(8); - fifoDirectWrite16 = AlignCode4(); - GenFifoWrite(16); - fifoDirectWrite32 = AlignCode4(); - GenFifoWrite(32); - fifoDirectWrite64 = AlignCode4(); - GenFifoWrite(64); frsqrte = AlignCode4(); GenFrsqrte(); fres = AlignCode4(); diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp index a3054f7509..cdc91294a9 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp @@ -203,28 +203,6 @@ bool EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int access return offsetAddedToAddress; } -void EmuCodeBlock::UnsafeWriteGatherPipe(int accessSize) -{ - // No need to protect these, they don't touch any state - // question - should we inline them instead? Pro: Lose a CALL Con: Code bloat - switch (accessSize) - { - case 8: - CALL(g_jit->GetAsmRoutines()->fifoDirectWrite8); - break; - case 16: - CALL(g_jit->GetAsmRoutines()->fifoDirectWrite16); - break; - case 32: - CALL(g_jit->GetAsmRoutines()->fifoDirectWrite32); - break; - case 64: - CALL(g_jit->GetAsmRoutines()->fifoDirectWrite64); - break; - } - g_jit->js.fifoBytesSinceCheck += accessSize >> 3; -} - // Visitor that generates code to read a MMIO value. template class MMIOReadCodeGenerator : public MMIO::ReadHandlingMethodVisitor @@ -622,10 +600,22 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address, // fun tricks... if (g_jit->jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(address)) { - if (!arg.IsSimpleReg(RSCRATCH)) - MOV(accessSize, R(RSCRATCH), arg); + X64Reg arg_reg = RSCRATCH; - UnsafeWriteGatherPipe(accessSize); + // With movbe, we can store inplace without temporary register + if (arg.IsSimpleReg() && cpu_info.bMOVBE) + arg_reg = arg.GetSimpleReg(); + + if (!arg.IsSimpleReg(arg_reg)) + MOV(accessSize, R(arg_reg), arg); + + // And store it in the gather pipe + MOV(64, R(RSCRATCH2), PPCSTATE(gather_pipe_ptr)); + SwapAndStore(accessSize, MatR(RSCRATCH2), arg_reg); + ADD(64, R(RSCRATCH2), Imm8(accessSize >> 3)); + MOV(64, PPCSTATE(gather_pipe_ptr), R(RSCRATCH2)); + + g_jit->js.fifoBytesSinceCheck += accessSize >> 3; return false; } else if (PowerPC::IsOptimizableRAMAddress(address)) diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h index 20e44d0bc0..acf10f80a5 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h @@ -61,7 +61,6 @@ public: bool UnsafeLoadToReg(Gen::X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset, bool signExtend, Gen::MovInfo* info = nullptr); - void UnsafeWriteGatherPipe(int accessSize); // Generate a load/write from the MMIO handler for a given address. Only // call for known addresses in MMIO range (MMIO::IsMMIOAddress). diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp index fb9e5f3433..afdd2b1a83 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp @@ -24,22 +24,6 @@ using namespace Gen; -void CommonAsmRoutines::GenFifoWrite(int size) -{ - const void* start = GetCodePtr(); - - // Assume value in RSCRATCH - MOV(64, R(RSCRATCH2), ImmPtr(&PowerPC::ppcState.gather_pipe_ptr)); - MOV(64, R(RSCRATCH2), MatR(RSCRATCH2)); - SwapAndStore(size, MatR(RSCRATCH2), RSCRATCH); - MOV(64, R(RSCRATCH), ImmPtr(&PowerPC::ppcState.gather_pipe_ptr)); - ADD(64, R(RSCRATCH2), Imm8(size >> 3)); - MOV(64, MatR(RSCRATCH), R(RSCRATCH2)); - RET(); - - JitRegister::Register(start, GetCodePtr(), "JIT_FifoWrite_%i", size); -} - void CommonAsmRoutines::GenFrsqrte() { const void* start = GetCodePtr(); diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.h b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.h index 09087f94b1..533330264a 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.h +++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.h @@ -24,7 +24,6 @@ private: class CommonAsmRoutines : public CommonAsmRoutinesBase, public QuantizedMemoryRoutines { public: - void GenFifoWrite(int size); void GenFrsqrte(); void GenFres(); void GenMfcr(); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h index 3322dccade..fbcdee675f 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h @@ -15,11 +15,6 @@ alignas(16) extern const float m_dequantizeTableS[128]; class CommonAsmRoutinesBase { public: - const u8* fifoDirectWrite8; - const u8* fifoDirectWrite16; - const u8* fifoDirectWrite32; - const u8* fifoDirectWrite64; - const u8* enterCode; const u8* dispatcherMispredictedBLR; From 6ad63c1b350221ef1bc8f20ba62338845d9dd0a0 Mon Sep 17 00:00:00 2001 From: degasus Date: Sat, 18 Nov 2017 15:05:27 +0100 Subject: [PATCH 3/5] Jit64: Inline GP fast check in Cleanup. --- Source/Core/Core/HW/GPFifo.cpp | 3 ++- Source/Core/Core/HW/GPFifo.h | 1 + Source/Core/Core/PowerPC/Jit64/Jit.cpp | 7 ++++++- Source/Core/Core/PowerPC/PowerPC.h | 1 + 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/Source/Core/Core/HW/GPFifo.cpp b/Source/Core/Core/HW/GPFifo.cpp index f786af6eac..d21f698c02 100644 --- a/Source/Core/Core/HW/GPFifo.cpp +++ b/Source/Core/Core/HW/GPFifo.cpp @@ -53,6 +53,7 @@ void DoState(PointerWrap& p) void Init() { ResetGatherPipe(); + PowerPC::ppcState.gather_pipe_base_ptr = s_gather_pipe; memset(s_gather_pipe, 0, sizeof(s_gather_pipe)); } @@ -66,7 +67,7 @@ void ResetGatherPipe() SetGatherPipeCount(0); } -static void UpdateGatherPipe() +void UpdateGatherPipe() { size_t pipe_count = GetGatherPipeCount(); size_t processed; diff --git a/Source/Core/Core/HW/GPFifo.h b/Source/Core/Core/HW/GPFifo.h index 6fb331ef37..d5779d73ea 100644 --- a/Source/Core/Core/HW/GPFifo.h +++ b/Source/Core/Core/HW/GPFifo.h @@ -21,6 +21,7 @@ void DoState(PointerWrap& p); // ResetGatherPipe void ResetGatherPipe(); +void UpdateGatherPipe(); void CheckGatherPipe(); void FastCheckGatherPipe(); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 1a8cd83972..facfe820e5 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -355,9 +355,14 @@ bool Jit64::Cleanup() if (jo.optimizeGatherPipe && js.fifoBytesSinceCheck > 0) { + MOV(64, R(RSCRATCH), PPCSTATE(gather_pipe_ptr)); + SUB(64, R(RSCRATCH), PPCSTATE(gather_pipe_base_ptr)); + CMP(64, R(RSCRATCH), Imm32(GPFifo::GATHER_PIPE_SIZE)); + FixupBranch exit = J_CC(CC_L); ABI_PushRegistersAndAdjustStack({}, 0); - ABI_CallFunction(GPFifo::FastCheckGatherPipe); + ABI_CallFunction(GPFifo::UpdateGatherPipe); ABI_PopRegistersAndAdjustStack({}, 0); + SetJumpTarget(exit); did_something = true; } diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 81248fb27e..2b3fe8a98e 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -95,6 +95,7 @@ struct PowerPCState // gather pipe pointer for JIT access u8* gather_pipe_ptr; + u8* gather_pipe_base_ptr; #if _M_X86_64 // This member exists for the purpose of an assertion in x86 JitBase.cpp From 6c9bb67ca0143f1bdc6dd12981e3be7c70b8189e Mon Sep 17 00:00:00 2001 From: degasus Date: Sat, 18 Nov 2017 17:14:32 +0100 Subject: [PATCH 4/5] JitArm64: Optimize gather pipe writes. --- .../PowerPC/JitArm64/JitArm64_LoadStore.cpp | 20 ++++++------------- .../JitArm64/JitArm64_LoadStoreFloating.cpp | 5 ++--- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 96b03340f8..2311056ab6 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -229,7 +229,6 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s if (is_immediate && jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(imm_addr)) { - ARM64Reg WA = INVALID_REG; int accessSize; if (flags & BackPatchInfo::FLAG_SIZE_32) accessSize = 32; @@ -238,30 +237,23 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s else accessSize = 8; - if (accessSize != 8) - WA = gpr.GetReg(); - - MOVP2R(X1, &PowerPC::ppcState.gather_pipe_ptr); - LDR(INDEX_UNSIGNED, X0, X1, 0); + LDR(INDEX_UNSIGNED, X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); if (accessSize == 32) { - REV32(WA, RS); - STR(INDEX_POST, WA, X0, 4); + REV32(W1, RS); + STR(INDEX_POST, W1, X0, 4); } else if (accessSize == 16) { - REV16(WA, RS); - STRH(INDEX_POST, WA, X0, 2); + REV16(W1, RS); + STRH(INDEX_POST, W1, X0, 2); } else { STRB(INDEX_POST, RS, X0, 1); } - STR(INDEX_UNSIGNED, X0, X1, 0); + STR(INDEX_UNSIGNED, X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); js.fifoBytesSinceCheck += accessSize >> 3; - - if (accessSize != 8) - gpr.Unlock(WA); } else if (is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) { diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index 12b8c43afc..1c307e889f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -356,8 +356,7 @@ void JitArm64::stfXX(UGeckoInstruction inst) else accessSize = 32; - MOVP2R(X1, &PowerPC::ppcState.gather_pipe_ptr); - LDR(INDEX_UNSIGNED, X0, X1, 0); + LDR(INDEX_UNSIGNED, X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); if (flags & BackPatchInfo::FLAG_SIZE_F64) { m_float_emit.REV64(8, Q0, V0); @@ -374,7 +373,7 @@ void JitArm64::stfXX(UGeckoInstruction inst) m_float_emit.STR(accessSize, INDEX_POST, accessSize == 64 ? Q0 : D0, X0, accessSize >> 3); - STR(INDEX_UNSIGNED, X0, X1, 0); + STR(INDEX_UNSIGNED, X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); js.fifoBytesSinceCheck += accessSize >> 3; if (update) From 6ea3f538b40ee9a378366f0c35c02bddfbfb7f62 Mon Sep 17 00:00:00 2001 From: degasus Date: Sat, 18 Nov 2017 17:42:11 +0100 Subject: [PATCH 5/5] JitArm64: Inline GP check in Cleanup. We're calling this function up to 2M times per second. Let's inline the pre-check. --- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 4b27c13f00..4452d3d1e4 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -231,8 +231,13 @@ void JitArm64::Cleanup() { if (jo.optimizeGatherPipe && js.fifoBytesSinceCheck > 0) { - MOVP2R(X0, &GPFifo::FastCheckGatherPipe); + LDP(INDEX_SIGNED, X0, X1, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); + SUB(X0, X0, X1); + CMP(X0, GPFifo::GATHER_PIPE_SIZE); + FixupBranch exit = B(CC_LT); + MOVP2R(X0, &GPFifo::UpdateGatherPipe); BLR(X0); + SetJumpTarget(exit); } }