From bbc0f0c744a8a7d90c3801192446ba1b3bd92584 Mon Sep 17 00:00:00 2001 From: hthh Date: Mon, 13 Jun 2016 17:25:24 +1000 Subject: [PATCH 1/2] Jit: Check the FIFO on EIEIO instructions The gather pipe optimization postpones checking the FIFO until the end of the current block (or 32 bytes have been written). This is usually safe, but is not correct across EIEIO instructions. This is inferred from a block in NBA2K11 which synchronizes the FIFO by writing a byte to it, executing eieio, and checking if PI_FIFO_WPTR has changed. This is not currently an issue, but will become an issue if the gather pipe optimization is applied to more stores. --- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 7 +++++-- Source/Core/Core/PowerPC/Jit64/Jit.h | 2 ++ Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp | 2 +- Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp | 12 ++++++++++++ Source/Core/Core/PowerPC/JitCommon/JitBase.h | 1 + 5 files changed, 21 insertions(+), 3 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index a1ef3c409c..ba5975ab24 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -598,6 +598,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc js.isLastInstruction = false; js.blockStart = em_address; js.fifoBytesThisBlock = 0; + js.mustCheckFifo = false; js.curBlock = b; js.numLoadStoreInst = 0; js.numFloatingPointInst = 0; @@ -723,9 +724,11 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc js.fifoWriteAddresses.find(ops[i].address) != js.fifoWriteAddresses.end(); // Gather pipe writes using an immediate address are explicitly tracked. - if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32) + if (jo.optimizeGatherPipe && (js.fifoBytesThisBlock >= 32 || js.mustCheckFifo)) { - js.fifoBytesThisBlock -= 32; + if (js.fifoBytesThisBlock >= 32) + js.fifoBytesThisBlock -= 32; + js.mustCheckFifo = false; BitSet32 registersInUse = CallerSavedRegistersInUse(); ABI_PushRegistersAndAdjustStack(registersInUse, 0); ABI_CallFunction((void*)&GPFifo::FastCheckGatherPipe); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index b00b63845b..4fce2e91bf 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -241,4 +241,6 @@ public: void stmw(UGeckoInstruction inst); void dcbx(UGeckoInstruction inst); + + void eieio(UGeckoInstruction inst); }; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp index 75df5bf07e..700e87c3ae 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp @@ -309,7 +309,7 @@ static GekkoOPTemplate table31[] = { // Unused instructions on GC {310, &Jit64::FallBackToInterpreter}, // eciwx {438, &Jit64::FallBackToInterpreter}, // ecowx - {854, &Jit64::DoNothing}, // eieio + {854, &Jit64::eieio}, // eieio {306, &Jit64::FallBackToInterpreter}, // tlbie {566, &Jit64::DoNothing}, // tlbsync }; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index a67dad0274..2059587dd6 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -598,3 +598,15 @@ void Jit64::stmw(UGeckoInstruction inst) } gpr.UnlockAllX(); } + +void Jit64::eieio(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITLoadStoreOff); + + // optimizeGatherPipe generally postpones FIFO checks to the end of the JIT block, + // which is generally safe. However postponing FIFO writes across eieio instructions + // is incorrect (would crash NBA2K11 strap screen if we improve our FIFO detection). + if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0) + js.mustCheckFifo = true; +} diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index 7b96a6b739..489bb5ba38 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -103,6 +103,7 @@ protected: bool generatingTrampoline = false; u8* trampolineExceptionHandler; + bool mustCheckFifo; int fifoBytesThisBlock; PPCAnalyst::BlockStats st; From d841d9c7b3c2b0378dae2d4aa0a67ac0a6c204a8 Mon Sep 17 00:00:00 2001 From: hthh Date: Fri, 12 Aug 2016 21:09:15 +1000 Subject: [PATCH 2/2] JitArm64: Check the FIFO on EIEIO instructions Copied from the Jit64 version --- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 7 +++++-- Source/Core/Core/PowerPC/JitArm64/Jit.h | 1 + .../Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp | 12 ++++++++++++ .../Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 2 +- 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index d615248083..e19343393a 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -405,6 +405,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB js.assumeNoPairedQuantize = false; js.blockStart = em_address; js.fifoBytesThisBlock = 0; + js.mustCheckFifo = false; js.downcountAmount = 0; js.skipInstructions = 0; js.curBlock = b; @@ -491,9 +492,11 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB bool gatherPipeIntCheck = jit->js.fifoWriteAddresses.find(ops[i].address) != jit->js.fifoWriteAddresses.end(); - if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32) + if (jo.optimizeGatherPipe && (js.fifoBytesThisBlock >= 32 || js.mustCheckFifo)) { - js.fifoBytesThisBlock -= 32; + if (js.fifoBytesThisBlock >= 32) + js.fifoBytesThisBlock -= 32; + js.mustCheckFifo = false; gpr.Lock(W30); BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 00181d916d..c2832ff460 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -116,6 +116,7 @@ public: void dcbx(UGeckoInstruction inst); void dcbt(UGeckoInstruction inst); void dcbz(UGeckoInstruction inst); + void eieio(UGeckoInstruction inst); // LoadStore floating point void lfXX(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 1876ff454e..34a8638024 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -853,3 +853,15 @@ void JitArm64::dcbz(UGeckoInstruction inst) gpr.Unlock(W0); } + +void JitArm64::eieio(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITLoadStoreOff); + + // optimizeGatherPipe generally postpones FIFO checks to the end of the JIT block, + // which is generally safe. However postponing FIFO writes across eieio instructions + // is incorrect (would crash NBA2K11 strap screen if we improve our FIFO detection). + if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0) + js.mustCheckFifo = true; +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index 18d05b4abf..723cf91aa6 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -314,7 +314,7 @@ static GekkoOPTemplate table31[] = { // Unused instructions on GC {310, &JitArm64::FallBackToInterpreter}, // eciwx {438, &JitArm64::FallBackToInterpreter}, // ecowx - {854, &JitArm64::DoNothing}, // eieio + {854, &JitArm64::eieio}, // eieio {306, &JitArm64::FallBackToInterpreter}, // tlbie {566, &JitArm64::DoNothing}, // tlbsync };