From 9e398fd418027e4932a1539f7d382b70e8acc408 Mon Sep 17 00:00:00 2001 From: skidau Date: Fri, 2 Mar 2012 18:53:41 +1100 Subject: [PATCH] Added an external exception check when the CPU writes to the FIFO. This allows the CPU time to service FIFO overflows. Fixes random hangs caused by FIFO overflows and desyncs like in "The Last Story" and "Battalion Wars 2". Thanks to marcosvitali for the research. Fixes issue 5209. Fixes issue 5150. Fixes issue 5055. Fixes issue 4889. Fixes issue 4061. Fixes issue 4010. Fixes issue 3902. --- Source/Core/Core/Src/HW/GPFifo.cpp | 14 ++++++++++-- .../Interpreter/Interpreter_LoadStore.cpp | 4 ++-- Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp | 19 ++++++++++++++++ Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp | 6 ++--- Source/Core/Core/Src/PowerPC/Jit64IL/IR.h | 7 ++++-- .../Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp | 17 ++++++++++++++ .../Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp | 5 +++++ .../Core/Core/Src/PowerPC/JitCommon/JitBase.h | 4 ++++ .../Core/Src/PowerPC/JitCommon/JitCache.cpp | 11 +++++----- .../Core/Src/PowerPC/JitCommon/JitCache.h | 2 +- Source/Core/Core/Src/PowerPC/PPCCache.cpp | 2 +- .../Core/VideoCommon/Src/CommandProcessor.cpp | 22 ------------------- .../Core/VideoCommon/Src/CommandProcessor.h | 1 - Source/Core/VideoCommon/Src/Fifo.cpp | 3 +-- 14 files changed, 75 insertions(+), 42 deletions(-) diff --git a/Source/Core/Core/Src/HW/GPFifo.cpp b/Source/Core/Core/Src/HW/GPFifo.cpp index 4d72317e0d..35d678a411 100644 --- a/Source/Core/Core/Src/HW/GPFifo.cpp +++ b/Source/Core/Core/Src/HW/GPFifo.cpp @@ -19,9 +19,9 @@ #include "ChunkFile.h" #include "ProcessorInterface.h" #include "Memmap.h" -#include "../PowerPC/PowerPC.h" - #include "VideoBackendBase.h" +#include "../PowerPC/JitCommon/JitBase.h" +#include "../PowerPC/PowerPC.h" #include "GPFifo.h" @@ -96,6 +96,16 @@ void STACKALIGN CheckGatherPipe() // move back the spill bytes memmove(m_gatherPipe, m_gatherPipe + cnt, m_gatherPipeCount); + + // Profile where the FIFO writes are occurring. + const u32 addr = PC - 4; + if (jit && (jit->js.fifoWriteAddresses.find(addr)) == (jit->js.fifoWriteAddresses.end())) + { + jit->js.fifoWriteAddresses.insert(addr); + + // Invalidate the JIT block so that it gets recompiled with the external exception check included. + jit->GetBlockCache()->InvalidateICache(addr, 8); + } } } diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp index a7955c0982..cad4761e55 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp @@ -367,7 +367,7 @@ void Interpreter::dcbf(UGeckoInstruction _inst) if (jit) { u32 address = Helper_Get_EA_X(_inst); - jit->GetBlockCache()->InvalidateICache(address & ~0x1f); + jit->GetBlockCache()->InvalidateICache(address & ~0x1f, 32); } } @@ -378,7 +378,7 @@ void Interpreter::dcbi(UGeckoInstruction _inst) if (jit) { u32 address = Helper_Get_EA_X(_inst); - jit->GetBlockCache()->InvalidateICache(address & ~0x1f); + jit->GetBlockCache()->InvalidateICache(address & ~0x1f, 32); } } diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp index 0d31893b53..a98df95fc3 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp @@ -41,6 +41,7 @@ #include "JitAsm.h" #include "JitRegCache.h" #include "Jit64_Tables.h" +#include "HW/ProcessorInterface.h" using namespace Gen; using namespace PowerPC; @@ -569,6 +570,24 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc SetJumpTarget(b1); } + // Add an external exception check if the instruction writes to the FIFO. + if (jit->js.fifoWriteAddresses.find(js.compilerPC) != jit->js.fifoWriteAddresses.end()) + { + gpr.Flush(FLUSH_ALL); + fpr.Flush(FLUSH_ALL); + + TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_EXTERNAL_INT)); + FixupBranch noExtException = J_CC(CC_Z); + TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP)); + FixupBranch noCPInt = J_CC(CC_Z); + + MOV(32, M(&PC), Imm32(js.compilerPC)); + WriteExceptionExit(); + + SetJumpTarget(noCPInt); + SetJumpTarget(noExtException); + } + Jit64Tables::CompileInstruction(ops[i]); if (js.memcheck && (opinfo->flags & FL_LOADSTORE)) diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp index 3a60a0ae11..afab21b30e 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp @@ -1260,8 +1260,8 @@ static const std::string opcodeNames[] = { "FResult_End", "StorePaired", "StoreSingle", "StoreDouble", "StoreFReg", "FDCmpCR", "CInt16", "CInt32", "SystemCall", "RFIExit", "InterpreterBranch", "IdleBranch", "ShortIdleLoop", - "FPExceptionCheckStart", "FPExceptionCheckEnd", "ISIException", "Tramp", - "BlockStart", "BlockEnd", "Int3", + "FPExceptionCheckStart", "FPExceptionCheckEnd", "ISIException", "ExtExceptionCheck", + "Tramp", "BlockStart", "BlockEnd", "Int3", }; static const unsigned alwaysUsedList[] = { InterpreterFallback, StoreGReg, StoreCR, StoreLink, StoreCTR, StoreMSR, @@ -1269,7 +1269,7 @@ static const unsigned alwaysUsedList[] = { Store16, Store32, StoreSingle, StoreDouble, StorePaired, StoreFReg, FDCmpCR, BlockStart, BlockEnd, IdleBranch, BranchCond, BranchUncond, ShortIdleLoop, SystemCall, InterpreterBranch, RFIExit, FPExceptionCheckStart, - FPExceptionCheckEnd, ISIException, Int3, Tramp, Nop + FPExceptionCheckEnd, ISIException, ExtExceptionCheck, Int3, Tramp, Nop }; static const unsigned extra8RegList[] = { LoadGReg, LoadCR, LoadGQR, LoadFReg, LoadFRegDENToZero, diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h index aca3dd0e37..284cbe6538 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h @@ -165,10 +165,10 @@ enum Opcode { ShortIdleLoop, // Idle loop seen in homebrew like wii mahjong, // just a branch - // used for MMU, at least until someone + // used for exception checking, at least until someone // has a better idea of integrating it FPExceptionCheckStart, FPExceptionCheckEnd, - ISIException, + ISIException,ExtExceptionCheck, // "Opcode" representing a register too far away to // reference directly; this is a size optimization Tramp, @@ -411,6 +411,9 @@ public: InstLoc EmitISIException(InstLoc dest) { return EmitUOp(ISIException, dest); } + InstLoc EmitExtExceptionCheck(InstLoc pc) { + return EmitUOp(ExtExceptionCheck, pc); + } InstLoc EmitRFIExit() { return FoldZeroOp(RFIExit, 0); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp index e1807cfbb4..6648189809 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/IR_X86.cpp @@ -50,6 +50,7 @@ The register allocation is linear scan allocation. #include "../../../../Common/Src/CPUDetect.h" #include "MathUtil.h" #include "../../Core.h" +#include "HW/ProcessorInterface.h" static ThunkManager thunks; @@ -761,6 +762,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak case FPExceptionCheckStart: case FPExceptionCheckEnd: case ISIException: + case ExtExceptionCheck: case Int3: case Tramp: // No liveness effects @@ -1920,6 +1922,21 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak Jit->WriteExceptionExit(); break; } + case ExtExceptionCheck: { + unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); + + Jit->TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_EXTERNAL_INT)); + FixupBranch noExtException = Jit->J_CC(CC_Z); + Jit->TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP)); + FixupBranch noCPInt = Jit->J_CC(CC_Z); + + Jit->MOV(32, M(&PC), Imm32(InstLoc)); + Jit->WriteExceptionExit(); + + Jit->SetJumpTarget(noCPInt); + Jit->SetJumpTarget(noExtException); + break; + } case Int3: { Jit->INT3(); break; diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp index 75755de5cb..96afff1613 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp @@ -648,6 +648,11 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc { ibuild.EmitFPExceptionCheckStart(ibuild.EmitIntConst(ops[i].address)); } + + if (jit->js.fifoWriteAddresses.find(js.compilerPC) != jit->js.fifoWriteAddresses.end()) + { + ibuild.EmitExtExceptionCheck(ibuild.EmitIntConst(ops[i].address)); + } JitILTables::CompileInstruction(ops[i]); diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.h index b832e96e1f..be80754bdc 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitBase.h @@ -31,6 +31,8 @@ #include "PowerPCDisasm.h" #include "disasm.h" +#include + #define JIT_OPCODE 0 class JitBase : public CPUCoreBase, public EmuCodeBlock @@ -75,6 +77,8 @@ protected: u8* rewriteStart; JitBlock *curBlock; + + std::set fifoWriteAddresses; }; public: diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.cpp index 422369d307..8f5ff5d820 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.cpp @@ -390,13 +390,12 @@ bool JitBlock::ContainsAddress(u32 em_address) } - void JitBlockCache::InvalidateICache(u32 address) + void JitBlockCache::InvalidateICache(u32 address, const u32 length) { - address &= ~0x1f; // destroy JIT blocks // !! this works correctly under assumption that any two overlapping blocks end at the same address std::map, u32>::iterator it1 = block_map.lower_bound(std::make_pair(address, 0)), it2 = it1, it; - while (it2 != block_map.end() && it2->first.second < address + 0x20) + while (it2 != block_map.end() && it2->first.second < address + length) { DestroyBlock(it2->second, true); it2++; @@ -418,17 +417,17 @@ bool JitBlock::ContainsAddress(u32 em_address) if (address & JIT_ICACHE_VMEM_BIT) { u32 cacheaddr = address & JIT_ICACHE_MASK; - memset(iCacheVMEM + cacheaddr, JIT_ICACHE_INVALID_BYTE, 32); + memset(iCacheVMEM + cacheaddr, JIT_ICACHE_INVALID_BYTE, length); } else if (address & JIT_ICACHE_EXRAM_BIT) { u32 cacheaddr = address & JIT_ICACHEEX_MASK; - memset(iCacheEx + cacheaddr, JIT_ICACHE_INVALID_BYTE, 32); + memset(iCacheEx + cacheaddr, JIT_ICACHE_INVALID_BYTE, length); } else { u32 cacheaddr = address & JIT_ICACHE_MASK; - memset(iCache + cacheaddr, JIT_ICACHE_INVALID_BYTE, 32); + memset(iCache + cacheaddr, JIT_ICACHE_INVALID_BYTE, length); } #endif } diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.h index 91d47a3d0e..7c19310e02 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitCache.h @@ -129,7 +129,7 @@ public: CompiledCode GetCompiledCodeFromBlock(int block_num); // DOES NOT WORK CORRECTLY WITH INLINING - void InvalidateICache(u32 em_address); + void InvalidateICache(u32 address, const u32 length); void DestroyBlock(int block_num, bool invalidate); // Not currently used diff --git a/Source/Core/Core/Src/PowerPC/PPCCache.cpp b/Source/Core/Core/Src/PowerPC/PPCCache.cpp index 704caa4a2c..6956e9db1c 100644 --- a/Source/Core/Core/Src/PowerPC/PPCCache.cpp +++ b/Source/Core/Core/Src/PowerPC/PPCCache.cpp @@ -110,7 +110,7 @@ namespace PowerPC #endif valid[set] = 0; if (jit) - jit->GetBlockCache()->InvalidateICache(addr); + jit->GetBlockCache()->InvalidateICache(addr & ~0x1f, 32); } u32 InstructionCache::ReadInstruction(u32 addr) diff --git a/Source/Core/VideoCommon/Src/CommandProcessor.cpp b/Source/Core/VideoCommon/Src/CommandProcessor.cpp index fc85b8e39d..dbe09aaa0e 100644 --- a/Source/Core/VideoCommon/Src/CommandProcessor.cpp +++ b/Source/Core/VideoCommon/Src/CommandProcessor.cpp @@ -60,7 +60,6 @@ volatile bool interruptSet= false; volatile bool interruptWaiting= false; volatile bool interruptTokenWaiting = false; volatile bool interruptFinishWaiting = false; -volatile bool OnOverflow = false; bool IsOnThread() { @@ -92,7 +91,6 @@ void DoState(PointerWrap &p) p.Do(interruptWaiting); p.Do(interruptTokenWaiting); p.Do(interruptFinishWaiting); - p.Do(OnOverflow); } inline void WriteLow (volatile u32& _reg, u16 lowbits) {Common::AtomicStore(_reg,(_reg & 0xFFFF0000) | lowbits);} @@ -135,7 +133,6 @@ void Init() bProcessFifoToLoWatermark = false; bProcessFifoAllDistance = false; isPossibleWaitingSetDrawDone = false; - OnOverflow = false; et_UpdateInterrupts = CoreTiming::RegisterEvent("UpdateInterrupts", UpdateInterrupts_Wrapper); } @@ -449,26 +446,7 @@ void STACKALIGN GatherPipeBursted() Common::AtomicAdd(fifo.CPReadWriteDistance, GATHER_PIPE_SIZE); if (!IsOnThread()) - { RunGpu(); - } - else - { - if(fifo.CPReadWriteDistance == fifo.CPEnd - fifo.CPBase - 32) - { - if(!OnOverflow) - NOTICE_LOG(COMMANDPROCESSOR,"FIFO is almost in overflown, BreakPoint: %i", fifo.bFF_Breakpoint); - OnOverflow = true; - while (!CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && - fifo.CPReadWriteDistance > fifo.CPEnd - fifo.CPBase - 64) - Common::YieldCPU(); - } - else - { - OnOverflow = false; - } - } - _assert_msg_(COMMANDPROCESSOR, fifo.CPReadWriteDistance <= fifo.CPEnd - fifo.CPBase, "FIFO is overflown by GatherPipe !\nCPU thread is too fast!"); diff --git a/Source/Core/VideoCommon/Src/CommandProcessor.h b/Source/Core/VideoCommon/Src/CommandProcessor.h index db6772d66d..bb969fcb95 100644 --- a/Source/Core/VideoCommon/Src/CommandProcessor.h +++ b/Source/Core/VideoCommon/Src/CommandProcessor.h @@ -35,7 +35,6 @@ extern volatile bool interruptSet; extern volatile bool interruptWaiting; extern volatile bool interruptTokenWaiting; extern volatile bool interruptFinishWaiting; -extern volatile bool OnOverflow; // internal hardware addresses enum diff --git a/Source/Core/VideoCommon/Src/Fifo.cpp b/Source/Core/VideoCommon/Src/Fifo.cpp index 842ff49e78..5cb3252816 100644 --- a/Source/Core/VideoCommon/Src/Fifo.cpp +++ b/Source/Core/VideoCommon/Src/Fifo.cpp @@ -137,8 +137,7 @@ void RunGpuLoop() CommandProcessor::SetCpStatus(); // check if we are able to run this buffer - while (!CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && - fifo.CPReadWriteDistance && (!AtBreakpoint() || CommandProcessor::OnOverflow)) + while (!CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint()) { if (!GpuRunningState) break;