diff --git a/Source/Core/Core/HW/GPFifo.cpp b/Source/Core/Core/HW/GPFifo.cpp index 403321fe37..d21f698c02 100644 --- a/Source/Core/Core/HW/GPFifo.cpp +++ b/Source/Core/Core/HW/GPFifo.cpp @@ -13,6 +13,7 @@ #include "Core/HW/Memmap.h" #include "Core/HW/ProcessorInterface.h" #include "Core/PowerPC/JitInterface.h" +#include "Core/PowerPC/PowerPC.h" #include "VideoCommon/CommandProcessor.h" namespace GPFifo @@ -31,17 +32,14 @@ namespace GPFifo // More room for the fastmodes alignas(32) static u8 s_gather_pipe[GATHER_PIPE_SIZE * 16]; -// pipe pointer -u8* g_gather_pipe_ptr = s_gather_pipe; - static size_t GetGatherPipeCount() { - return g_gather_pipe_ptr - s_gather_pipe; + return PowerPC::ppcState.gather_pipe_ptr - s_gather_pipe; } static void SetGatherPipeCount(size_t size) { - g_gather_pipe_ptr = s_gather_pipe + size; + PowerPC::ppcState.gather_pipe_ptr = s_gather_pipe + size; } void DoState(PointerWrap& p) @@ -55,6 +53,7 @@ void DoState(PointerWrap& p) void Init() { ResetGatherPipe(); + PowerPC::ppcState.gather_pipe_base_ptr = s_gather_pipe; memset(s_gather_pipe, 0, sizeof(s_gather_pipe)); } @@ -68,7 +67,7 @@ void ResetGatherPipe() SetGatherPipeCount(0); } -static void UpdateGatherPipe() +void UpdateGatherPipe() { size_t pipe_count = GetGatherPipeCount(); size_t processed; @@ -144,29 +143,29 @@ void Write64(const u64 value) void FastWrite8(const u8 value) { - *g_gather_pipe_ptr = value; - g_gather_pipe_ptr += sizeof(u8); + *PowerPC::ppcState.gather_pipe_ptr = value; + PowerPC::ppcState.gather_pipe_ptr += sizeof(u8); } void FastWrite16(u16 value) { value = Common::swap16(value); - std::memcpy(g_gather_pipe_ptr, &value, sizeof(u16)); - g_gather_pipe_ptr += sizeof(u16); + std::memcpy(PowerPC::ppcState.gather_pipe_ptr, &value, sizeof(u16)); + PowerPC::ppcState.gather_pipe_ptr += sizeof(u16); } void FastWrite32(u32 value) { value = Common::swap32(value); - std::memcpy(g_gather_pipe_ptr, &value, sizeof(u32)); - g_gather_pipe_ptr += sizeof(u32); + std::memcpy(PowerPC::ppcState.gather_pipe_ptr, &value, sizeof(u32)); + PowerPC::ppcState.gather_pipe_ptr += sizeof(u32); } void FastWrite64(u64 value) { value = Common::swap64(value); - std::memcpy(g_gather_pipe_ptr, &value, sizeof(u64)); - g_gather_pipe_ptr += sizeof(u64); + std::memcpy(PowerPC::ppcState.gather_pipe_ptr, &value, sizeof(u64)); + PowerPC::ppcState.gather_pipe_ptr += sizeof(u64); } } // end of namespace GPFifo diff --git a/Source/Core/Core/HW/GPFifo.h b/Source/Core/Core/HW/GPFifo.h index eb37c589b9..d5779d73ea 100644 --- a/Source/Core/Core/HW/GPFifo.h +++ b/Source/Core/Core/HW/GPFifo.h @@ -15,15 +15,13 @@ enum GATHER_PIPE_SIZE = 32 }; -// pipe pointer for JIT access -extern u8* g_gather_pipe_ptr; - // Init void Init(); void DoState(PointerWrap& p); // ResetGatherPipe void ResetGatherPipe(); +void UpdateGatherPipe(); void CheckGatherPipe(); void FastCheckGatherPipe(); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 1a8cd83972..facfe820e5 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -355,9 +355,14 @@ bool Jit64::Cleanup() if (jo.optimizeGatherPipe && js.fifoBytesSinceCheck > 0) { + MOV(64, R(RSCRATCH), PPCSTATE(gather_pipe_ptr)); + SUB(64, R(RSCRATCH), PPCSTATE(gather_pipe_base_ptr)); + CMP(64, R(RSCRATCH), Imm32(GPFifo::GATHER_PIPE_SIZE)); + FixupBranch exit = J_CC(CC_L); ABI_PushRegistersAndAdjustStack({}, 0); - ABI_CallFunction(GPFifo::FastCheckGatherPipe); + ABI_CallFunction(GPFifo::UpdateGatherPipe); ABI_PopRegistersAndAdjustStack({}, 0); + SetJumpTarget(exit); did_something = true; } diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index 68434f1789..9c62303f50 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -222,14 +222,6 @@ void Jit64AsmRoutineManager::ResetStack(X64CodeBlock& emitter) void Jit64AsmRoutineManager::GenerateCommon() { - fifoDirectWrite8 = AlignCode4(); - GenFifoWrite(8); - fifoDirectWrite16 = AlignCode4(); - GenFifoWrite(16); - fifoDirectWrite32 = AlignCode4(); - GenFifoWrite(32); - fifoDirectWrite64 = AlignCode4(); - GenFifoWrite(64); frsqrte = AlignCode4(); GenFrsqrte(); fres = AlignCode4(); diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp index a3054f7509..cdc91294a9 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp @@ -203,28 +203,6 @@ bool EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int access return offsetAddedToAddress; } -void EmuCodeBlock::UnsafeWriteGatherPipe(int accessSize) -{ - // No need to protect these, they don't touch any state - // question - should we inline them instead? Pro: Lose a CALL Con: Code bloat - switch (accessSize) - { - case 8: - CALL(g_jit->GetAsmRoutines()->fifoDirectWrite8); - break; - case 16: - CALL(g_jit->GetAsmRoutines()->fifoDirectWrite16); - break; - case 32: - CALL(g_jit->GetAsmRoutines()->fifoDirectWrite32); - break; - case 64: - CALL(g_jit->GetAsmRoutines()->fifoDirectWrite64); - break; - } - g_jit->js.fifoBytesSinceCheck += accessSize >> 3; -} - // Visitor that generates code to read a MMIO value. template class MMIOReadCodeGenerator : public MMIO::ReadHandlingMethodVisitor @@ -622,10 +600,22 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address, // fun tricks... if (g_jit->jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(address)) { - if (!arg.IsSimpleReg(RSCRATCH)) - MOV(accessSize, R(RSCRATCH), arg); + X64Reg arg_reg = RSCRATCH; - UnsafeWriteGatherPipe(accessSize); + // With movbe, we can store inplace without temporary register + if (arg.IsSimpleReg() && cpu_info.bMOVBE) + arg_reg = arg.GetSimpleReg(); + + if (!arg.IsSimpleReg(arg_reg)) + MOV(accessSize, R(arg_reg), arg); + + // And store it in the gather pipe + MOV(64, R(RSCRATCH2), PPCSTATE(gather_pipe_ptr)); + SwapAndStore(accessSize, MatR(RSCRATCH2), arg_reg); + ADD(64, R(RSCRATCH2), Imm8(accessSize >> 3)); + MOV(64, PPCSTATE(gather_pipe_ptr), R(RSCRATCH2)); + + g_jit->js.fifoBytesSinceCheck += accessSize >> 3; return false; } else if (PowerPC::IsOptimizableRAMAddress(address)) diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h index 20e44d0bc0..acf10f80a5 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h @@ -61,7 +61,6 @@ public: bool UnsafeLoadToReg(Gen::X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset, bool signExtend, Gen::MovInfo* info = nullptr); - void UnsafeWriteGatherPipe(int accessSize); // Generate a load/write from the MMIO handler for a given address. Only // call for known addresses in MMIO range (MMIO::IsMMIOAddress). diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp index 1d8ce07be4..afdd2b1a83 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp @@ -12,7 +12,6 @@ #include "Common/MathUtil.h" #include "Common/x64ABI.h" #include "Common/x64Emitter.h" -#include "Core/HW/GPFifo.h" #include "Core/PowerPC/Gekko.h" #include "Core/PowerPC/Jit64Common/Jit64Base.h" #include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" @@ -25,22 +24,6 @@ using namespace Gen; -void CommonAsmRoutines::GenFifoWrite(int size) -{ - const void* start = GetCodePtr(); - - // Assume value in RSCRATCH - MOV(64, R(RSCRATCH2), ImmPtr(&GPFifo::g_gather_pipe_ptr)); - MOV(64, R(RSCRATCH2), MatR(RSCRATCH2)); - SwapAndStore(size, MatR(RSCRATCH2), RSCRATCH); - MOV(64, R(RSCRATCH), ImmPtr(&GPFifo::g_gather_pipe_ptr)); - ADD(64, R(RSCRATCH2), Imm8(size >> 3)); - MOV(64, MatR(RSCRATCH), R(RSCRATCH2)); - RET(); - - JitRegister::Register(start, GetCodePtr(), "JIT_FifoWrite_%i", size); -} - void CommonAsmRoutines::GenFrsqrte() { const void* start = GetCodePtr(); diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.h b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.h index 09087f94b1..533330264a 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.h +++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.h @@ -24,7 +24,6 @@ private: class CommonAsmRoutines : public CommonAsmRoutinesBase, public QuantizedMemoryRoutines { public: - void GenFifoWrite(int size); void GenFrsqrte(); void GenFres(); void GenMfcr(); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 4b27c13f00..4452d3d1e4 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -231,8 +231,13 @@ void JitArm64::Cleanup() { if (jo.optimizeGatherPipe && js.fifoBytesSinceCheck > 0) { - MOVP2R(X0, &GPFifo::FastCheckGatherPipe); + LDP(INDEX_SIGNED, X0, X1, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); + SUB(X0, X0, X1); + CMP(X0, GPFifo::GATHER_PIPE_SIZE); + FixupBranch exit = B(CC_LT); + MOVP2R(X0, &GPFifo::UpdateGatherPipe); BLR(X0); + SetJumpTarget(exit); } } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index b4e057753e..2311056ab6 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -10,7 +10,6 @@ #include "Core/Core.h" #include "Core/CoreTiming.h" #include "Core/HW/DSP.h" -#include "Core/HW/GPFifo.h" #include "Core/HW/MMIO.h" #include "Core/HW/Memmap.h" #include "Core/PowerPC/JitArm64/Jit.h" @@ -230,7 +229,6 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s if (is_immediate && jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(imm_addr)) { - ARM64Reg WA = INVALID_REG; int accessSize; if (flags & BackPatchInfo::FLAG_SIZE_32) accessSize = 32; @@ -239,30 +237,23 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s else accessSize = 8; - if (accessSize != 8) - WA = gpr.GetReg(); - - MOVP2R(X1, &GPFifo::g_gather_pipe_ptr); - LDR(INDEX_UNSIGNED, X0, X1, 0); + LDR(INDEX_UNSIGNED, X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); if (accessSize == 32) { - REV32(WA, RS); - STR(INDEX_POST, WA, X0, 4); + REV32(W1, RS); + STR(INDEX_POST, W1, X0, 4); } else if (accessSize == 16) { - REV16(WA, RS); - STRH(INDEX_POST, WA, X0, 2); + REV16(W1, RS); + STRH(INDEX_POST, W1, X0, 2); } else { STRB(INDEX_POST, RS, X0, 1); } - STR(INDEX_UNSIGNED, X0, X1, 0); + STR(INDEX_UNSIGNED, X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); js.fifoBytesSinceCheck += accessSize >> 3; - - if (accessSize != 8) - gpr.Unlock(WA); } else if (is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) { diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index e90f5f6f9c..1c307e889f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -10,7 +10,6 @@ #include "Core/Core.h" #include "Core/CoreTiming.h" -#include "Core/HW/GPFifo.h" #include "Core/PowerPC/JitArm64/Jit.h" #include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" #include "Core/PowerPC/PPCTables.h" @@ -357,8 +356,7 @@ void JitArm64::stfXX(UGeckoInstruction inst) else accessSize = 32; - MOVP2R(X1, &GPFifo::g_gather_pipe_ptr); - LDR(INDEX_UNSIGNED, X0, X1, 0); + LDR(INDEX_UNSIGNED, X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); if (flags & BackPatchInfo::FLAG_SIZE_F64) { m_float_emit.REV64(8, Q0, V0); @@ -375,7 +373,7 @@ void JitArm64::stfXX(UGeckoInstruction inst) m_float_emit.STR(accessSize, INDEX_POST, accessSize == 64 ? Q0 : D0, X0, accessSize >> 3); - STR(INDEX_UNSIGNED, X0, X1, 0); + STR(INDEX_UNSIGNED, X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); js.fifoBytesSinceCheck += accessSize >> 3; if (update) diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h index 3322dccade..fbcdee675f 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h @@ -15,11 +15,6 @@ alignas(16) extern const float m_dequantizeTableS[128]; class CommonAsmRoutinesBase { public: - const u8* fifoDirectWrite8; - const u8* fifoDirectWrite16; - const u8* fifoDirectWrite32; - const u8* fifoDirectWrite64; - const u8* enterCode; const u8* dispatcherMispredictedBLR; diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 9e53c239b9..2b3fe8a98e 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -93,6 +93,10 @@ struct PowerPCState // lscbx u16 xer_stringctrl; + // gather pipe pointer for JIT access + u8* gather_pipe_ptr; + u8* gather_pipe_base_ptr; + #if _M_X86_64 // This member exists for the purpose of an assertion in x86 JitBase.cpp // that its offset <= 0x100. To minimize code size on x86, we want as much