From 62787085e1038cf8bdc8e42c1b7d3df6a270cbbc Mon Sep 17 00:00:00 2001 From: JosJuice Date: Fri, 23 Jun 2023 17:06:39 +0200 Subject: [PATCH] Jit: Add feature flag for performance monitor By making the JIT cache check if the current state of MMCR0 and MMRC1 matches the state they had at the time the JIT block was compiled, we solve a correctness issue (marked in a comment as a speed hack). Not known to affect any games. --- Source/Core/Core/PowerPC/GDBStub.cpp | 2 ++ Source/Core/Core/PowerPC/Gekko.h | 1 + .../Interpreter_SystemRegisters.cpp | 5 ++++ Source/Core/Core/PowerPC/Jit64/Jit.cpp | 5 ++-- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 5 ++-- Source/Core/Core/PowerPC/JitCommon/JitCache.h | 4 +-- Source/Core/Core/PowerPC/PPCAnalyst.cpp | 27 ++++++++++++++++--- Source/Core/Core/PowerPC/PPCTables.cpp | 2 +- Source/Core/Core/PowerPC/PowerPC.cpp | 26 +++++++++++++++--- Source/Core/Core/PowerPC/PowerPC.h | 2 ++ 10 files changed, 63 insertions(+), 16 deletions(-) diff --git a/Source/Core/Core/PowerPC/GDBStub.cpp b/Source/Core/Core/PowerPC/GDBStub.cpp index 35f0ddbe23..5ab058ef82 100644 --- a/Source/Core/Core/PowerPC/GDBStub.cpp +++ b/Source/Core/Core/PowerPC/GDBStub.cpp @@ -761,6 +761,7 @@ static void WriteRegister() break; case 131: ppc_state.spr[SPR_MMCR0] = re32hex(bufptr); + PowerPC::MMCRUpdated(ppc_state); break; case 132: ppc_state.spr[SPR_PMC1] = re32hex(bufptr); @@ -773,6 +774,7 @@ static void WriteRegister() break; case 135: ppc_state.spr[SPR_MMCR1] = re32hex(bufptr); + PowerPC::MMCRUpdated(ppc_state); break; case 136: ppc_state.spr[SPR_PMC3] = re32hex(bufptr); diff --git a/Source/Core/Core/PowerPC/Gekko.h b/Source/Core/Core/PowerPC/Gekko.h index 1acf6632f2..62a1743b45 100644 --- a/Source/Core/Core/PowerPC/Gekko.h +++ b/Source/Core/Core/PowerPC/Gekko.h @@ -930,6 +930,7 @@ enum CPUEmuFeatureFlags : u32 { FEATURE_FLAG_MSR_DR = 1 << 0, FEATURE_FLAG_MSR_IR = 1 << 1, + FEATURE_FLAG_PERFMON = 1 << 2, }; constexpr s32 SignExt16(s16 x) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp index f4afeb6b96..8e8025cec9 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp @@ -491,6 +491,11 @@ void Interpreter::mtspr(Interpreter& interpreter, UGeckoInstruction inst) } break; + case SPR_MMCR0: + case SPR_MMCR1: + MMCRUpdated(ppc_state); + break; + case SPR_THRM1: case SPR_THRM2: case SPR_THRM3: diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 1b97ae1851..a6881c1172 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -337,7 +337,7 @@ void Jit64::FallBackToInterpreter(UGeckoInstruction inst) gpr.Flush(); fpr.Flush(); - if (js.op->opinfo->flags & FL_ENDBLOCK) + if (js.op->canEndBlock) { MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); MOV(32, PPCSTATE(npc), Imm32(js.compilerPC + 4)); @@ -353,7 +353,7 @@ void Jit64::FallBackToInterpreter(UGeckoInstruction inst) gpr.Reset(js.op->regsOut); fpr.Reset(js.op->GetFregsOut()); - if (js.op->opinfo->flags & FL_ENDBLOCK) + if (js.op->canEndBlock) { if (js.isLastInstruction) { @@ -445,7 +445,6 @@ bool Jit64::Cleanup() did_something = true; } - // SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time. if (MMCR0(m_ppc_state).Hex || MMCR1(m_ppc_state).Hex) { ABI_PushRegistersAndAdjustStack({}, 0); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 4f973f5c06..9474805972 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -187,7 +187,7 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst) gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); - if (js.op->opinfo->flags & FL_ENDBLOCK) + if (js.op->canEndBlock) { // also flush the program counter ARM64Reg WA = gpr.GetReg(); @@ -207,7 +207,7 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst) fpr.ResetRegisters(js.op->GetFregsOut()); gpr.ResetCRRegisters(js.op->crOut); - if (js.op->opinfo->flags & FL_ENDBLOCK) + if (js.op->canEndBlock) { if (js.isLastInstruction) { @@ -276,7 +276,6 @@ void JitArm64::Cleanup() SetJumpTarget(exit); } - // SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time. if (MMCR0(m_ppc_state).Hex || MMCR1(m_ppc_state).Hex) { ABI_CallFunction(&PowerPC::UpdatePerformanceMonitor, js.downcountAmount, js.numLoadStoreInst, diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index 8c327c202e..feb872ee7b 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -130,8 +130,8 @@ class JitBaseBlockCache { public: // The size of the fast map is determined like this: - // ((4 GiB guest memory space) / (4-byte alignment) * sizeof(JitBlock*)) << (2 feature flag bits) - static constexpr u64 FAST_BLOCK_MAP_SIZE = 0x8'0000'0000; + // ((4 GiB guest memory space) / (4-byte alignment) * sizeof(JitBlock*)) << (3 feature flag bits) + static constexpr u64 FAST_BLOCK_MAP_SIZE = 0x10'0000'0000; static constexpr u32 FAST_BLOCK_MAP_FALLBACK_ELEMENTS = 0x10000; static constexpr u32 FAST_BLOCK_MAP_FALLBACK_MASK = FAST_BLOCK_MAP_FALLBACK_ELEMENTS - 1; diff --git a/Source/Core/Core/PowerPC/PPCAnalyst.cpp b/Source/Core/Core/PowerPC/PPCAnalyst.cpp index f093863492..04d15b971e 100644 --- a/Source/Core/Core/PowerPC/PPCAnalyst.cpp +++ b/Source/Core/Core/PowerPC/PPCAnalyst.cpp @@ -202,6 +202,23 @@ static void AnalyzeFunction2(Common::Symbol* func) func->flags = flags; } +static bool IsMtspr(UGeckoInstruction inst) +{ + return inst.OPCD == 31 && inst.SUBOP10 == 467; +} + +static bool IsSprInstructionUsingMmcr(UGeckoInstruction inst) +{ + const u32 index = (inst.SPRU << 5) | (inst.SPRL & 0x1F); + return index == SPR_MMCR0 || index == SPR_MMCR1; +} + +static bool InstructionCanEndBlock(const CodeOp& op) +{ + return (op.opinfo->flags & FL_ENDBLOCK) && + (!IsMtspr(op.inst) || IsSprInstructionUsingMmcr(op.inst)); +} + bool PPCAnalyzer::CanSwapAdjacentOps(const CodeOp& a, const CodeOp& b) const { const GekkoOPInfo* a_info = a.opinfo; @@ -222,9 +239,11 @@ bool PPCAnalyzer::CanSwapAdjacentOps(const CodeOp& a, const CodeOp& b) const // [1] https://bugs.dolphin-emu.org/issues/5864#note-7 if (a.canCauseException || b.canCauseException) return false; - if (a_flags & (FL_ENDBLOCK | FL_TIMER | FL_NO_REORDER | FL_SET_OE)) + if (a.canEndBlock || b.canEndBlock) return false; - if (b_flags & (FL_ENDBLOCK | FL_TIMER | FL_NO_REORDER | FL_SET_OE)) + if (a_flags & (FL_TIMER | FL_NO_REORDER | FL_SET_OE)) + return false; + if (b_flags & (FL_TIMER | FL_NO_REORDER | FL_SET_OE)) return false; if ((a_flags & (FL_SET_CA | FL_READ_CA)) && (b_flags & (FL_SET_CA | FL_READ_CA))) return false; @@ -597,7 +616,7 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock* block, CodeOp* code, code->wantsFPRF = (opinfo->flags & FL_READ_FPRF) != 0; code->outputFPRF = (opinfo->flags & FL_SET_FPRF) != 0; - code->canEndBlock = (opinfo->flags & FL_ENDBLOCK) != 0; + code->canEndBlock = InstructionCanEndBlock(*code); code->canCauseException = first_fpu_instruction || (opinfo->flags & (FL_LOADSTORE | FL_PROGRAMEXCEPTION)) != 0 || @@ -935,7 +954,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, { // Just pick the next instruction address += 4; - if (!conditional_continue && opinfo->flags & FL_ENDBLOCK) // right now we stop early + if (!conditional_continue && InstructionCanEndBlock(code[i])) // right now we stop early { found_exit = true; break; diff --git a/Source/Core/Core/PowerPC/PPCTables.cpp b/Source/Core/Core/PowerPC/PPCTables.cpp index 9d70fb662b..8eeb5da076 100644 --- a/Source/Core/Core/PowerPC/PPCTables.cpp +++ b/Source/Core/Core/PowerPC/PPCTables.cpp @@ -374,7 +374,7 @@ constexpr std::array s_table31{{ {210, "mtsr", OpType::System, 1, FL_IN_S | FL_PROGRAMEXCEPTION}, {242, "mtsrin", OpType::System, 1, FL_IN_SB | FL_PROGRAMEXCEPTION}, {339, "mfspr", OpType::SPR, 1, FL_OUT_D | FL_PROGRAMEXCEPTION}, - {467, "mtspr", OpType::SPR, 2, FL_IN_S | FL_PROGRAMEXCEPTION}, + {467, "mtspr", OpType::SPR, 2, FL_IN_S | FL_ENDBLOCK | FL_PROGRAMEXCEPTION}, {371, "mftb", OpType::System, 1, FL_OUT_D | FL_TIMER | FL_PROGRAMEXCEPTION}, {512, "mcrxr", OpType::System, 1, FL_SET_CRn | FL_READ_CA | FL_SET_CA}, {595, "mfsr", OpType::System, 3, FL_OUT_D | FL_PROGRAMEXCEPTION}, diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index f83df655e5..b994b00f6e 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -137,7 +137,7 @@ void PowerPCManager::DoState(PointerWrap& p) } RoundingModeUpdated(m_ppc_state); - MSRUpdated(m_ppc_state); + RecalculateAllFeatureFlags(m_ppc_state); auto& mmu = m_system.GetMMU(); mmu.IBATUpdated(); @@ -209,7 +209,7 @@ void PowerPCManager::ResetRegisters() SystemTimers::DecrementerSet(); RoundingModeUpdated(m_ppc_state); - MSRUpdated(m_ppc_state); + RecalculateAllFeatureFlags(m_ppc_state); } void PowerPCManager::InitializeCPUCore(CPUCore cpu_core) @@ -710,7 +710,27 @@ void MSRUpdated(PowerPCState& ppc_state) static_assert(FEATURE_FLAG_MSR_DR == 1 << 0); static_assert(FEATURE_FLAG_MSR_IR == 1 << 1); - ppc_state.feature_flags = static_cast((ppc_state.msr.Hex >> 4) & 0x3); + ppc_state.feature_flags = static_cast( + (ppc_state.feature_flags & FEATURE_FLAG_PERFMON) | ((ppc_state.msr.Hex >> 4) & 0x3)); +} + +void MMCRUpdated(PowerPCState& ppc_state) +{ + const bool perfmon = ppc_state.spr[SPR_MMCR0] || ppc_state.spr[SPR_MMCR1]; + ppc_state.feature_flags = static_cast( + (ppc_state.feature_flags & ~FEATURE_FLAG_PERFMON) | (perfmon ? FEATURE_FLAG_PERFMON : 0)); +} + +void RecalculateAllFeatureFlags(PowerPCState& ppc_state) +{ + static_assert(UReg_MSR{}.DR.StartBit() == 4); + static_assert(UReg_MSR{}.IR.StartBit() == 5); + static_assert(FEATURE_FLAG_MSR_DR == 1 << 0); + static_assert(FEATURE_FLAG_MSR_IR == 1 << 1); + + const bool perfmon = ppc_state.spr[SPR_MMCR0] || ppc_state.spr[SPR_MMCR1]; + ppc_state.feature_flags = static_cast(((ppc_state.msr.Hex >> 4) & 0x3) | + (perfmon ? FEATURE_FLAG_PERFMON : 0)); } void CheckExceptionsFromJIT(PowerPCManager& power_pc) diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 8a1701bb10..afcc0ac8bf 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -349,5 +349,7 @@ void CheckBreakPointsFromJIT(PowerPCManager& power_pc); void RoundingModeUpdated(PowerPCState& ppc_state); void MSRUpdated(PowerPCState& ppc_state); +void MMCRUpdated(PowerPCState& ppc_state); +void RecalculateAllFeatureFlags(PowerPCState& ppc_state); } // namespace PowerPC