diff --git a/Source/Core/Core/Boot/Boot.cpp b/Source/Core/Core/Boot/Boot.cpp index 86501bd910..5fc52585e0 100644 --- a/Source/Core/Core/Boot/Boot.cpp +++ b/Source/Core/Core/Boot/Boot.cpp @@ -460,6 +460,9 @@ bool CBoot::Load_BS2(Core::System& system, const std::string& boot_rom_filename) SetupBAT(system, /*is_wii*/ false); ppc_state.pc = 0x81200150; + + PowerPC::MSRUpdated(ppc_state); + return true; } diff --git a/Source/Core/Core/Boot/Boot_BS2Emu.cpp b/Source/Core/Core/Boot/Boot_BS2Emu.cpp index 8c18e7fd54..86c026f558 100644 --- a/Source/Core/Core/Boot/Boot_BS2Emu.cpp +++ b/Source/Core/Core/Boot/Boot_BS2Emu.cpp @@ -74,6 +74,7 @@ void CBoot::SetupMSR(PowerPC::PowerPCState& ppc_state) ppc_state.msr.DR = 1; ppc_state.msr.IR = 1; ppc_state.msr.FP = 1; + PowerPC::MSRUpdated(ppc_state); } void CBoot::SetupHID(PowerPC::PowerPCState& ppc_state, bool is_wii) diff --git a/Source/Core/Core/FifoPlayer/FifoPlayer.cpp b/Source/Core/Core/FifoPlayer/FifoPlayer.cpp index 32980927dc..dcb80b31d7 100644 --- a/Source/Core/Core/FifoPlayer/FifoPlayer.cpp +++ b/Source/Core/Core/FifoPlayer/FifoPlayer.cpp @@ -651,6 +651,8 @@ void FifoPlayer::LoadMemory() ppc_state.spr[SPR_DBAT1U] = 0xc0001fff; ppc_state.spr[SPR_DBAT1L] = 0x0000002a; + PowerPC::MSRUpdated(ppc_state); + auto& mmu = system.GetMMU(); mmu.DBATUpdated(); mmu.IBATUpdated(); diff --git a/Source/Core/Core/IOS/MIOS.cpp b/Source/Core/Core/IOS/MIOS.cpp index 52fcb553c2..63728ff903 100644 --- a/Source/Core/Core/IOS/MIOS.cpp +++ b/Source/Core/Core/IOS/MIOS.cpp @@ -84,10 +84,15 @@ bool Load() } auto& power_pc = system.GetPowerPC(); + const PowerPC::CoreMode core_mode = power_pc.GetMode(); power_pc.SetMode(PowerPC::CoreMode::Interpreter); - power_pc.GetPPCState().msr.Hex = 0; - power_pc.GetPPCState().pc = 0x3400; + + PowerPC::PowerPCState& ppc_state = power_pc.GetPPCState(); + ppc_state.msr.Hex = 0; + ppc_state.pc = 0x3400; + PowerPC::MSRUpdated(ppc_state); + NOTICE_LOG_FMT(IOS, "Loaded MIOS and bootstrapped PPC."); // IOS writes 0 to 0x30f8 before bootstrapping the PPC. Once started, the IPL eventually writes diff --git a/Source/Core/Core/PowerPC/GDBStub.cpp b/Source/Core/Core/PowerPC/GDBStub.cpp index d88d6a06b0..5ab058ef82 100644 --- a/Source/Core/Core/PowerPC/GDBStub.cpp +++ b/Source/Core/Core/PowerPC/GDBStub.cpp @@ -661,6 +661,7 @@ static void WriteRegister() break; case 65: ppc_state.msr.Hex = re32hex(bufptr); + PowerPC::MSRUpdated(ppc_state); break; case 66: ppc_state.cr.Set(re32hex(bufptr)); @@ -760,6 +761,7 @@ static void WriteRegister() break; case 131: ppc_state.spr[SPR_MMCR0] = re32hex(bufptr); + PowerPC::MMCRUpdated(ppc_state); break; case 132: ppc_state.spr[SPR_PMC1] = re32hex(bufptr); @@ -772,6 +774,7 @@ static void WriteRegister() break; case 135: ppc_state.spr[SPR_MMCR1] = re32hex(bufptr); + PowerPC::MMCRUpdated(ppc_state); break; case 136: ppc_state.spr[SPR_PMC3] = re32hex(bufptr); diff --git a/Source/Core/Core/PowerPC/Gekko.h b/Source/Core/Core/PowerPC/Gekko.h index 2485424171..62a1743b45 100644 --- a/Source/Core/Core/PowerPC/Gekko.h +++ b/Source/Core/Core/PowerPC/Gekko.h @@ -926,6 +926,13 @@ enum EXCEPTION_FAKE_MEMCHECK_HIT = 0x00000200, }; +enum CPUEmuFeatureFlags : u32 +{ + FEATURE_FLAG_MSR_DR = 1 << 0, + FEATURE_FLAG_MSR_IR = 1 << 1, + FEATURE_FLAG_PERFMON = 1 << 2, +}; + constexpr s32 SignExt16(s16 x) { return (s32)x; diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Branch.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Branch.cpp index e2cdff4fd6..8b5bac0509 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Branch.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Branch.cpp @@ -134,6 +134,9 @@ void Interpreter::rfi(Interpreter& interpreter, UGeckoInstruction inst) // else // set NPC to saved offset and resume ppc_state.npc = SRR0(ppc_state); + + PowerPC::MSRUpdated(ppc_state); + interpreter.m_end_block = true; } diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp index 80ddc8efc3..8e8025cec9 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp @@ -181,6 +181,8 @@ void Interpreter::mtmsr(Interpreter& interpreter, UGeckoInstruction inst) ppc_state.msr.Hex = ppc_state.gpr[inst.RS]; + PowerPC::MSRUpdated(ppc_state); + // FE0/FE1 may have been set CheckFPExceptions(ppc_state); @@ -489,6 +491,11 @@ void Interpreter::mtspr(Interpreter& interpreter, UGeckoInstruction inst) } break; + case SPR_MMCR0: + case SPR_MMCR1: + MMCRUpdated(ppc_state); + break; + case SPR_THRM1: case SPR_THRM2: case SPR_THRM3: diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 251f71c135..5938080b0e 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -337,7 +337,7 @@ void Jit64::FallBackToInterpreter(UGeckoInstruction inst) gpr.Flush(); fpr.Flush(); - if (js.op->opinfo->flags & FL_ENDBLOCK) + if (js.op->canEndBlock) { MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); MOV(32, PPCSTATE(npc), Imm32(js.compilerPC + 4)); @@ -353,7 +353,7 @@ void Jit64::FallBackToInterpreter(UGeckoInstruction inst) gpr.Reset(js.op->regsOut); fpr.Reset(js.op->GetFregsOut()); - if (js.op->opinfo->flags & FL_ENDBLOCK) + if (js.op->canEndBlock) { if (js.isLastInstruction) { @@ -445,8 +445,7 @@ bool Jit64::Cleanup() did_something = true; } - // SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time. - if (MMCR0(m_ppc_state).Hex || MMCR1(m_ppc_state).Hex) + if (m_ppc_state.feature_flags & FEATURE_FLAG_PERFMON) { ABI_PushRegistersAndAdjustStack({}, 0); ABI_CallFunctionCCCP(PowerPC::UpdatePerformanceMonitor, js.downcountAmount, js.numLoadStoreInst, @@ -483,8 +482,7 @@ void Jit64::FakeBLCall(u32 after) // We may need to fake the BLR stack on inlined CALL instructions. // Else we can't return to this location any more. - MOV(64, R(RSCRATCH2), - Imm64(u64(m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK) << 32 | after)); + MOV(64, R(RSCRATCH2), Imm64(u64(m_ppc_state.feature_flags) << 32 | after)); PUSH(RSCRATCH2); FixupBranch skip_exit = CALL(); POP(RSCRATCH2); @@ -497,8 +495,11 @@ void Jit64::EmitUpdateMembase() MOV(64, R(RMEM), PPCSTATE(mem_ptr)); } -void Jit64::EmitStoreMembase(const OpArg& msr, X64Reg scratch_reg) +void Jit64::MSRUpdated(const OpArg& msr, X64Reg scratch_reg) { + ASSERT(!msr.IsSimpleReg(scratch_reg)); + + // Update mem_ptr auto& memory = m_system.GetMemory(); if (msr.IsImm()) { @@ -513,6 +514,26 @@ void Jit64::EmitStoreMembase(const OpArg& msr, X64Reg scratch_reg) CMOVcc(64, RMEM, R(scratch_reg), CC_Z); } MOV(64, PPCSTATE(mem_ptr), R(RMEM)); + + // Update feature_flags + static_assert(UReg_MSR{}.DR.StartBit() == 4); + static_assert(UReg_MSR{}.IR.StartBit() == 5); + static_assert(FEATURE_FLAG_MSR_DR == 1 << 0); + static_assert(FEATURE_FLAG_MSR_IR == 1 << 1); + const u32 other_feature_flags = m_ppc_state.feature_flags & ~0x3; + if (msr.IsImm()) + { + MOV(32, PPCSTATE(feature_flags), Imm32(other_feature_flags | ((msr.Imm32() >> 4) & 0x3))); + } + else + { + MOV(32, R(scratch_reg), msr); + SHR(32, R(scratch_reg), Imm8(4)); + AND(32, R(scratch_reg), Imm32(0x3)); + if (other_feature_flags != 0) + OR(32, R(scratch_reg), Imm32(other_feature_flags)); + MOV(32, PPCSTATE(feature_flags), R(scratch_reg)); + } } void Jit64::WriteExit(u32 destination, bool bl, u32 after) @@ -524,8 +545,7 @@ void Jit64::WriteExit(u32 destination, bool bl, u32 after) if (bl) { - MOV(64, R(RSCRATCH2), - Imm64(u64(m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK) << 32 | after)); + MOV(64, R(RSCRATCH2), Imm64(u64(m_ppc_state.feature_flags) << 32 | after)); PUSH(RSCRATCH2); } @@ -582,8 +602,7 @@ void Jit64::WriteExitDestInRSCRATCH(bool bl, u32 after) if (bl) { - MOV(64, R(RSCRATCH2), - Imm64(u64(m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK) << 32 | after)); + MOV(64, R(RSCRATCH2), Imm64(u64(m_ppc_state.feature_flags) << 32 | after)); PUSH(RSCRATCH2); } @@ -611,10 +630,9 @@ void Jit64::WriteBLRExit() bool disturbed = Cleanup(); if (disturbed) MOV(32, R(RSCRATCH), PPCSTATE(pc)); - const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK; - if (msr_bits != 0) + if (m_ppc_state.feature_flags != 0) { - MOV(32, R(RSCRATCH2), Imm32(msr_bits)); + MOV(32, R(RSCRATCH2), Imm32(m_ppc_state.feature_flags)); SHL(64, R(RSCRATCH2), Imm8(32)); OR(64, R(RSCRATCH), R(RSCRATCH2)); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 7f1d107c62..9fd3f9d7f7 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -88,7 +88,7 @@ public: // Utilities for use by opcodes void EmitUpdateMembase(); - void EmitStoreMembase(const Gen::OpArg& msr, Gen::X64Reg scratch_reg); + void MSRUpdated(const Gen::OpArg& msr, Gen::X64Reg scratch_reg); void FakeBLCall(u32 after); void WriteExit(u32 destination, bool bl = false, u32 after = 0); void JustWriteExit(u32 destination, bool bl, u32 after); diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index a5d89cd315..525d65cf70 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -19,6 +19,7 @@ #include "Core/System.h" using namespace Gen; + Jit64AsmRoutineManager::Jit64AsmRoutineManager(Jit64& jit) : CommonAsmRoutines(jit) { } @@ -118,19 +119,17 @@ void Jit64AsmRoutineManager::Generate() { if (m_jit.GetBlockCache()->GetEntryPoints()) { - MOV(32, R(RSCRATCH2), PPCSTATE(msr)); - AND(32, R(RSCRATCH2), Imm32(JitBaseBlockCache::JIT_CACHE_MSR_MASK)); - SHL(64, R(RSCRATCH2), Imm8(28)); + MOV(32, R(RSCRATCH2), PPCSTATE(feature_flags)); + SHL(64, R(RSCRATCH2), Imm8(32)); MOV(32, R(RSCRATCH_EXTRA), PPCSTATE(pc)); OR(64, R(RSCRATCH_EXTRA), R(RSCRATCH2)); u64 icache = reinterpret_cast(m_jit.GetBlockCache()->GetEntryPoints()); MOV(64, R(RSCRATCH2), Imm64(icache)); - // The entry points map is indexed by ((msrBits << 26) | (address >> 2)). - // The map contains 8 byte 64-bit pointers and that means we need to shift - // msr left by 29 bits and address left by 1 bit to get the correct offset - // in the map. + // The entry points map is indexed by ((feature_flags << 30) | (pc >> 2)). + // The map contains 8-byte pointers and that means we need to shift feature_flags + // left by 33 bits and pc left by 1 bit to get the correct offset in the map. MOV(64, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH_EXTRA, SCALE_2, 0)); } else @@ -160,17 +159,17 @@ void Jit64AsmRoutineManager::Generate() if (!m_jit.GetBlockCache()->GetEntryPoints()) { - // Check block.msrBits. - MOV(32, R(RSCRATCH2), PPCSTATE(msr)); - AND(32, R(RSCRATCH2), Imm32(JitBaseBlockCache::JIT_CACHE_MSR_MASK)); + // Check block.feature_flags. + MOV(32, R(RSCRATCH2), PPCSTATE(feature_flags)); // Also check the block.effectiveAddress. RSCRATCH_EXTRA still has the PC. SHL(64, R(RSCRATCH_EXTRA), Imm8(32)); OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); - static_assert(offsetof(JitBlockData, msrBits) + 4 == + static_assert(offsetof(JitBlockData, feature_flags) + 4 == offsetof(JitBlockData, effectiveAddress)); - CMP(64, R(RSCRATCH2), MDisp(RSCRATCH, static_cast(offsetof(JitBlockData, msrBits)))); + CMP(64, R(RSCRATCH2), + MDisp(RSCRATCH, static_cast(offsetof(JitBlockData, feature_flags)))); state_mismatch = J_CC(CC_NE); // Success; branch to the block we found. diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp index 5a24cc2108..20de3a33a4 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp @@ -47,6 +47,7 @@ void Jit64::rfi(UGeckoInstruction inst) gpr.Flush(); fpr.Flush(); + // See Interpreter rfi for details const u32 mask = 0x87C0FFFF; const u32 clearMSR13 = 0xFFFBFFFF; // Mask used to clear the bit MSR[13] @@ -56,7 +57,9 @@ void Jit64::rfi(UGeckoInstruction inst) AND(32, R(RSCRATCH), Imm32(mask & clearMSR13)); OR(32, PPCSTATE(msr), R(RSCRATCH)); - EmitStoreMembase(R(RSCRATCH), RSCRATCH2); + // Call MSRUpdated to update feature_flags. Only the bits that come from SRR1 + // are relevant for this, so it's fine to pass in RSCRATCH in place of msr. + MSRUpdated(R(RSCRATCH), RSCRATCH2); // NPC = SRR0; MOV(32, R(RSCRATCH), PPCSTATE_SRR0); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index 83313c56d9..6a1dc65141 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -320,7 +320,7 @@ void Jit64::dcbx(UGeckoInstruction inst) FixupBranch bat_lookup_failed; MOV(32, R(effective_address), R(addr)); const u8* loop_start = GetCodePtr(); - if (m_ppc_state.msr.IR) + if (m_ppc_state.feature_flags & FEATURE_FLAG_MSR_IR) { // Translate effective address to physical address. bat_lookup_failed = BATAddressLookup(addr, tmp, m_jit.m_mmu.GetIBATTable().data()); @@ -349,7 +349,7 @@ void Jit64::dcbx(UGeckoInstruction inst) SwitchToFarCode(); SetJumpTarget(invalidate_needed); - if (m_ppc_state.msr.IR) + if (m_ppc_state.feature_flags & FEATURE_FLAG_MSR_IR) SetJumpTarget(bat_lookup_failed); BitSet32 registersInUse = CallerSavedRegistersInUse(); @@ -421,7 +421,7 @@ void Jit64::dcbz(UGeckoInstruction inst) end_dcbz_hack = J_CC(CC_L); } - bool emit_fast_path = m_ppc_state.msr.DR && m_jit.jo.fastmem_arena; + bool emit_fast_path = (m_ppc_state.feature_flags & FEATURE_FLAG_MSR_DR) && m_jit.jo.fastmem_arena; if (emit_fast_path) { diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 41d999631b..91865b61a0 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -23,7 +23,7 @@ void Jit64::psq_stXX(UGeckoInstruction inst) JITDISABLE(bJITLoadStorePairedOff); // For performance, the AsmCommon routines assume address translation is on. - FALLBACK_IF(!m_ppc_state.msr.DR); + FALLBACK_IF(!(m_ppc_state.feature_flags & FEATURE_FLAG_MSR_DR)); s32 offset = inst.SIMM_12; bool indexed = inst.OPCD == 4; @@ -112,7 +112,7 @@ void Jit64::psq_lXX(UGeckoInstruction inst) JITDISABLE(bJITLoadStorePairedOff); // For performance, the AsmCommon routines assume address translation is on. - FALLBACK_IF(!m_ppc_state.msr.DR); + FALLBACK_IF(!(m_ppc_state.feature_flags & FEATURE_FLAG_MSR_DR)); s32 offset = inst.SIMM_12; bool indexed = inst.OPCD == 4; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index 7d5db65e47..67e218bb98 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -439,7 +439,7 @@ void Jit64::mtmsr(UGeckoInstruction inst) RegCache::Realize(Rs); MOV(32, PPCSTATE(msr), Rs); - EmitStoreMembase(Rs, RSCRATCH2); + MSRUpdated(Rs, RSCRATCH2); } gpr.Flush(); diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp index c18901eba3..0fa9cfdcc0 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp @@ -371,7 +371,8 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, } FixupBranch exit; - const bool dr_set = (flags & SAFE_LOADSTORE_DR_ON) || m_jit.m_ppc_state.msr.DR; + const bool dr_set = + (flags & SAFE_LOADSTORE_DR_ON) || (m_jit.m_ppc_state.feature_flags & FEATURE_FLAG_MSR_DR); const bool fast_check_address = !force_slow_access && dr_set && m_jit.jo.fastmem_arena && !m_jit.m_ppc_state.m_enable_dcache; if (fast_check_address) @@ -544,7 +545,8 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces } FixupBranch exit; - const bool dr_set = (flags & SAFE_LOADSTORE_DR_ON) || m_jit.m_ppc_state.msr.DR; + const bool dr_set = + (flags & SAFE_LOADSTORE_DR_ON) || (m_jit.m_ppc_state.feature_flags & FEATURE_FLAG_MSR_DR); const bool fast_check_address = !force_slow_access && dr_set && m_jit.jo.fastmem_arena && !m_jit.m_ppc_state.m_enable_dcache; if (fast_check_address) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 9216b506ae..8d7638fde3 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -187,7 +187,7 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst) gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); - if (js.op->opinfo->flags & FL_ENDBLOCK) + if (js.op->canEndBlock) { // also flush the program counter ARM64Reg WA = gpr.GetReg(); @@ -207,7 +207,7 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst) fpr.ResetRegisters(js.op->GetFregsOut()); gpr.ResetCRRegisters(js.op->crOut); - if (js.op->opinfo->flags & FL_ENDBLOCK) + if (js.op->canEndBlock) { if (js.isLastInstruction) { @@ -276,8 +276,7 @@ void JitArm64::Cleanup() SetJumpTarget(exit); } - // SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time. - if (MMCR0(m_ppc_state).Hex || MMCR1(m_ppc_state).Hex) + if (m_ppc_state.feature_flags & FEATURE_FLAG_PERFMON) { ABI_CallFunction(&PowerPC::UpdatePerformanceMonitor, js.downcountAmount, js.numLoadStoreInst, js.numFloatingPointInst, &m_ppc_state); @@ -348,27 +347,61 @@ void JitArm64::EmitUpdateMembase() LDR(IndexType::Unsigned, MEM_REG, PPC_REG, PPCSTATE_OFF(mem_ptr)); } -void JitArm64::EmitStoreMembase(u32 msr) +void JitArm64::MSRUpdated(u32 msr) { + // Update mem_ptr auto& memory = m_system.GetMemory(); MOVP2R(MEM_REG, UReg_MSR(msr).DR ? (jo.fastmem ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase()) : (jo.fastmem ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase())); STR(IndexType::Unsigned, MEM_REG, PPC_REG, PPCSTATE_OFF(mem_ptr)); + + // Update feature_flags + static_assert(UReg_MSR{}.DR.StartBit() == 4); + static_assert(UReg_MSR{}.IR.StartBit() == 5); + static_assert(FEATURE_FLAG_MSR_DR == 1 << 0); + static_assert(FEATURE_FLAG_MSR_IR == 1 << 1); + const u32 other_feature_flags = m_ppc_state.feature_flags & ~0x3; + const u32 feature_flags = other_feature_flags | ((msr >> 4) & 0x3); + if (feature_flags == 0) + { + STR(IndexType::Unsigned, ARM64Reg::WZR, PPC_REG, PPCSTATE_OFF(feature_flags)); + } + else + { + ARM64Reg WA = gpr.GetReg(); + MOVI2R(WA, feature_flags); + STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(feature_flags)); + gpr.Unlock(WA); + } } -void JitArm64::EmitStoreMembase(const ARM64Reg& msr) +void JitArm64::MSRUpdated(ARM64Reg msr) { + ARM64Reg WA = gpr.GetReg(); + ARM64Reg XA = EncodeRegTo64(WA); + + // Update mem_ptr auto& memory = m_system.GetMemory(); - ARM64Reg WD = gpr.GetReg(); - ARM64Reg XD = EncodeRegTo64(WD); MOVP2R(MEM_REG, jo.fastmem ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase()); - MOVP2R(XD, jo.fastmem ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase()); + MOVP2R(XA, jo.fastmem ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase()); TST(msr, LogicalImm(1 << (31 - 27), 32)); - CSEL(MEM_REG, MEM_REG, XD, CCFlags::CC_NEQ); + CSEL(MEM_REG, MEM_REG, XA, CCFlags::CC_NEQ); STR(IndexType::Unsigned, MEM_REG, PPC_REG, PPCSTATE_OFF(mem_ptr)); - gpr.Unlock(WD); + + // Update feature_flags + static_assert(UReg_MSR{}.DR.StartBit() == 4); + static_assert(UReg_MSR{}.IR.StartBit() == 5); + static_assert(FEATURE_FLAG_MSR_DR == 1 << 0); + static_assert(FEATURE_FLAG_MSR_IR == 1 << 1); + const u32 other_feature_flags = m_ppc_state.feature_flags & ~0x3; + UBFX(WA, msr, 4, 2); + if (other_feature_flags != 0) + ORR(WA, WA, LogicalImm(32, other_feature_flags)); + STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(feature_flags)); + + gpr.Unlock(WA); } void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return, @@ -383,20 +416,20 @@ void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return const u8* host_address_after_return; if (LK) { - // Push {ARM_PC (64-bit); PPC_PC (32-bit); MSR_BITS (32-bit)} on the stack + // Push {ARM_PC (64-bit); PPC_PC (32-bit); feature_flags (32-bit)} on the stack ARM64Reg reg_to_push = ARM64Reg::X1; - const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK; + const u64 feature_flags = m_ppc_state.feature_flags; if (exit_address_after_return_reg == ARM64Reg::INVALID_REG) { - MOVI2R(ARM64Reg::X1, msr_bits << 32 | exit_address_after_return); + MOVI2R(ARM64Reg::X1, feature_flags << 32 | exit_address_after_return); } - else if (msr_bits == 0) + else if (feature_flags == 0) { reg_to_push = EncodeRegTo64(exit_address_after_return_reg); } else { - ORRI2R(ARM64Reg::X1, EncodeRegTo64(exit_address_after_return_reg), msr_bits << 32, + ORRI2R(ARM64Reg::X1, EncodeRegTo64(exit_address_after_return_reg), feature_flags << 32, ARM64Reg::X1); } constexpr s32 adr_offset = JitArm64BlockCache::BLOCK_LINK_SIZE + sizeof(u32) * 2; @@ -487,20 +520,20 @@ void JitArm64::WriteExit(Arm64Gen::ARM64Reg dest, bool LK, u32 exit_address_afte } else { - // Push {ARM_PC (64-bit); PPC_PC (32-bit); MSR_BITS (32-bit)} on the stack + // Push {ARM_PC (64-bit); PPC_PC (32-bit); feature_flags (32-bit)} on the stack ARM64Reg reg_to_push = ARM64Reg::X1; - const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK; + const u64 feature_flags = m_ppc_state.feature_flags; if (exit_address_after_return_reg == ARM64Reg::INVALID_REG) { - MOVI2R(ARM64Reg::X1, msr_bits << 32 | exit_address_after_return); + MOVI2R(ARM64Reg::X1, feature_flags << 32 | exit_address_after_return); } - else if (msr_bits == 0) + else if (feature_flags == 0) { reg_to_push = EncodeRegTo64(exit_address_after_return_reg); } else { - ORRI2R(ARM64Reg::X1, EncodeRegTo64(exit_address_after_return_reg), msr_bits << 32, + ORRI2R(ARM64Reg::X1, EncodeRegTo64(exit_address_after_return_reg), feature_flags << 32, ARM64Reg::X1); } constexpr s32 adr_offset = sizeof(u32) * 3; @@ -558,17 +591,17 @@ void JitArm64::FakeLKExit(u32 exit_address_after_return, ARM64Reg exit_address_a // function has been called! gpr.Lock(ARM64Reg::W30); } - // Push {ARM_PC (64-bit); PPC_PC (32-bit); MSR_BITS (32-bit)} on the stack + // Push {ARM_PC (64-bit); PPC_PC (32-bit); feature_flags (32-bit)} on the stack ARM64Reg after_reg = ARM64Reg::INVALID_REG; ARM64Reg reg_to_push; - const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK; + const u64 feature_flags = m_ppc_state.feature_flags; if (exit_address_after_return_reg == ARM64Reg::INVALID_REG) { after_reg = gpr.GetReg(); reg_to_push = EncodeRegTo64(after_reg); - MOVI2R(reg_to_push, msr_bits << 32 | exit_address_after_return); + MOVI2R(reg_to_push, feature_flags << 32 | exit_address_after_return); } - else if (msr_bits == 0) + else if (feature_flags == 0) { reg_to_push = EncodeRegTo64(exit_address_after_return_reg); } @@ -576,7 +609,8 @@ void JitArm64::FakeLKExit(u32 exit_address_after_return, ARM64Reg exit_address_a { after_reg = gpr.GetReg(); reg_to_push = EncodeRegTo64(after_reg); - ORRI2R(reg_to_push, EncodeRegTo64(exit_address_after_return_reg), msr_bits << 32, reg_to_push); + ORRI2R(reg_to_push, EncodeRegTo64(exit_address_after_return_reg), feature_flags << 32, + reg_to_push); } ARM64Reg code_reg = gpr.GetReg(); constexpr s32 adr_offset = sizeof(u32) * 3; @@ -640,16 +674,16 @@ void JitArm64::WriteBLRExit(Arm64Gen::ARM64Reg dest) Cleanup(); EndTimeProfile(js.curBlock); - // Check if {PPC_PC, MSR_BITS} matches the current state, then RET to ARM_PC. + // Check if {PPC_PC, feature_flags} matches the current state, then RET to ARM_PC. LDP(IndexType::Post, ARM64Reg::X2, ARM64Reg::X1, ARM64Reg::SP, 16); - const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK; - if (msr_bits == 0) + const u64 feature_flags = m_ppc_state.feature_flags; + if (feature_flags == 0) { CMP(ARM64Reg::X1, EncodeRegTo64(DISPATCHER_PC)); } else { - ORRI2R(ARM64Reg::X0, EncodeRegTo64(DISPATCHER_PC), msr_bits << 32, ARM64Reg::X0); + ORRI2R(ARM64Reg::X0, EncodeRegTo64(DISPATCHER_PC), feature_flags << 32, ARM64Reg::X0); CMP(ARM64Reg::X1, ARM64Reg::X0); } FixupBranch no_match = B(CC_NEQ); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 0096d56654..4aa9b91cb6 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -310,8 +310,8 @@ protected: void EndTimeProfile(JitBlock* b); void EmitUpdateMembase(); - void EmitStoreMembase(u32 msr); - void EmitStoreMembase(const Arm64Gen::ARM64Reg& msr); + void MSRUpdated(u32 msr); + void MSRUpdated(Arm64Gen::ARM64Reg msr); // Exits void diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp index a1c211718c..8eaad6f98f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp @@ -64,11 +64,11 @@ void JitArm64::rfi(UGeckoInstruction inst) ORR(WA, WA, WC); // rB = Masked MSR OR masked SRR1 STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr)); // STR rB in to rA + gpr.Unlock(WB, WC); - EmitStoreMembase(WA); + MSRUpdated(WA); LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_SRR0)); - gpr.Unlock(WB, WC); WriteExceptionExit(WA); gpr.Unlock(WA); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 483f10f088..1ecfea4e4f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -727,7 +727,7 @@ void JitArm64::dcbx(UGeckoInstruction inst) // Translate effective address to physical address. const u8* loop_start = GetCodePtr(); FixupBranch bat_lookup_failed; - if (m_ppc_state.msr.IR) + if (m_ppc_state.feature_flags & FEATURE_FLAG_MSR_IR) { bat_lookup_failed = BATAddressLookup(physical_addr, effective_addr, WA, m_mmu.GetIBATTable().data()); @@ -756,7 +756,7 @@ void JitArm64::dcbx(UGeckoInstruction inst) SwitchToFarCode(); SetJumpTarget(invalidate_needed); - if (m_ppc_state.msr.IR) + if (m_ppc_state.feature_flags & FEATURE_FLAG_MSR_IR) SetJumpTarget(bat_lookup_failed); BitSet32 gprs_to_push = gpr.GetCallerSavedUsed(); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index 1f3426bd47..52bb74658c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -23,7 +23,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) JITDISABLE(bJITLoadStorePairedOff); // If fastmem is enabled, the asm routines assume address translation is on. - FALLBACK_IF(!js.assumeNoPairedQuantize && jo.fastmem && !m_ppc_state.msr.DR); + FALLBACK_IF(!js.assumeNoPairedQuantize && jo.fastmem && + !(m_ppc_state.feature_flags & FEATURE_FLAG_MSR_DR)); // X30 is LR // X0 is the address @@ -151,7 +152,8 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) JITDISABLE(bJITLoadStorePairedOff); // If fastmem is enabled, the asm routines assume address translation is on. - FALLBACK_IF(!js.assumeNoPairedQuantize && jo.fastmem && !m_ppc_state.msr.DR); + FALLBACK_IF(!js.assumeNoPairedQuantize && jo.fastmem && + !(m_ppc_state.feature_flags & FEATURE_FLAG_MSR_DR)); // X30 is LR // X0 contains the scale diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 9e229b4767..e9dcc4f3fb 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -94,12 +94,12 @@ void JitArm64::mtmsr(UGeckoInstruction inst) const bool imm_value = gpr.IsImm(inst.RS); if (imm_value) - EmitStoreMembase(gpr.GetImm(inst.RS)); + MSRUpdated(gpr.GetImm(inst.RS)); STR(IndexType::Unsigned, gpr.R(inst.RS), PPC_REG, PPCSTATE_OFF(msr)); if (!imm_value) - EmitStoreMembase(gpr.R(inst.RS)); + MSRUpdated(gpr.R(inst.RS)); gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index 350edc2ba4..0a0f5e2d2e 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -100,15 +100,20 @@ void JitArm64::GenerateAsm() if (GetBlockCache()->GetEntryPoints()) { // Check if there is a block - ARM64Reg pc_and_msr = ARM64Reg::X8; - ARM64Reg cache_base = ARM64Reg::X9; - ARM64Reg block = ARM64Reg::X10; - LDR(IndexType::Unsigned, EncodeRegTo32(pc_and_msr), PPC_REG, PPCSTATE_OFF(msr)); + ARM64Reg feature_flags = ARM64Reg::W8; + ARM64Reg pc_and_feature_flags = ARM64Reg::X9; + ARM64Reg cache_base = ARM64Reg::X10; + ARM64Reg block = ARM64Reg::X11; + + LDR(IndexType::Unsigned, feature_flags, PPC_REG, PPCSTATE_OFF(feature_flags)); MOVP2R(cache_base, GetBlockCache()->GetEntryPoints()); - // The entry points map is indexed by ((msrBits << 26) | (address >> 2)). - UBFIZ(pc_and_msr, pc_and_msr, 26, 6); - BFXIL(pc_and_msr, EncodeRegTo64(DISPATCHER_PC), 2, 30); - LDR(block, cache_base, ArithOption(pc_and_msr, true)); + // The entry points map is indexed by ((feature_flags << 30) | (pc >> 2)). + // The map contains 8-byte pointers and that means we need to shift feature_flags + // left by 33 bits and pc left by 1 bit to get the correct offset in the map. + LSL(pc_and_feature_flags, EncodeRegTo64(DISPATCHER_PC), 1); + BFI(pc_and_feature_flags, EncodeRegTo64(feature_flags), 33, 31); + LDR(block, cache_base, pc_and_feature_flags); + FixupBranch not_found = CBZ(block); BR(block); SetJumpTarget(not_found); @@ -119,8 +124,8 @@ void JitArm64::GenerateAsm() ARM64Reg cache_base = ARM64Reg::X9; ARM64Reg block = ARM64Reg::X10; ARM64Reg pc = ARM64Reg::W11; - ARM64Reg msr = ARM64Reg::W12; - ARM64Reg msr2 = ARM64Reg::W13; + ARM64Reg feature_flags = ARM64Reg::W12; + ARM64Reg feature_flags_2 = ARM64Reg::W13; ARM64Reg entry = ARM64Reg::X14; // iCache[(address >> 2) & iCache_Mask]; @@ -130,25 +135,24 @@ void JitArm64::GenerateAsm() LDR(block, cache_base, ArithOption(EncodeRegTo64(pc_masked), true)); FixupBranch not_found = CBZ(block); - // b.effectiveAddress != addr || b.msrBits != msr - static_assert(offsetof(JitBlockData, msrBits) + 4 == + // b.effectiveAddress != addr || b.feature_flags != feature_flags + static_assert(offsetof(JitBlockData, feature_flags) + 4 == offsetof(JitBlockData, effectiveAddress)); - LDP(IndexType::Signed, msr, pc, block, offsetof(JitBlockData, msrBits)); - LDR(IndexType::Unsigned, msr2, PPC_REG, PPCSTATE_OFF(msr)); + LDP(IndexType::Signed, feature_flags, pc, block, offsetof(JitBlockData, feature_flags)); + LDR(IndexType::Unsigned, feature_flags_2, PPC_REG, PPCSTATE_OFF(feature_flags)); CMP(pc, DISPATCHER_PC); FixupBranch pc_mismatch = B(CC_NEQ); LDR(IndexType::Unsigned, entry, block, offsetof(JitBlockData, normalEntry)); - AND(msr2, msr2, LogicalImm(JitBaseBlockCache::JIT_CACHE_MSR_MASK, 32)); - CMP(msr, msr2); - FixupBranch msr_mismatch = B(CC_NEQ); + CMP(feature_flags, feature_flags_2); + FixupBranch feature_flags_mismatch = B(CC_NEQ); // return blocks[block_num].normalEntry; BR(entry); SetJumpTarget(not_found); SetJumpTarget(pc_mismatch); - SetJumpTarget(msr_mismatch); + SetJumpTarget(feature_flags_mismatch); } } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index c8dddbc4ae..04cb3f49f2 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -110,7 +110,7 @@ JitBlock* JitBaseBlockCache::AllocateBlock(u32 em_address) JitBlock& b = block_map.emplace(physical_address, JitBlock())->second; b.effectiveAddress = em_address; b.physicalAddress = physical_address; - b.msrBits = m_jit.m_ppc_state.msr.Hex & JIT_CACHE_MSR_MASK; + b.feature_flags = m_jit.m_ppc_state.feature_flags; b.linkData.clear(); b.fast_block_map_index = 0; return &b; @@ -119,7 +119,7 @@ JitBlock* JitBaseBlockCache::AllocateBlock(u32 em_address) void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link, const std::set& physical_addresses) { - size_t index = FastLookupIndexForAddress(block.effectiveAddress, block.msrBits); + size_t index = FastLookupIndexForAddress(block.effectiveAddress, block.feature_flags); if (m_entry_points_ptr) m_entry_points_ptr[index] = block.normalEntry; else @@ -159,10 +159,10 @@ void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link, } } -JitBlock* JitBaseBlockCache::GetBlockFromStartAddress(u32 addr, u32 msr) +JitBlock* JitBaseBlockCache::GetBlockFromStartAddress(u32 addr, CPUEmuFeatureFlags feature_flags) { u32 translated_addr = addr; - if (UReg_MSR(msr).IR) + if (feature_flags & FEATURE_FLAG_MSR_IR) { auto translated = m_jit.m_mmu.JitCache_TranslateAddress(addr); if (!translated.valid) @@ -176,7 +176,7 @@ JitBlock* JitBaseBlockCache::GetBlockFromStartAddress(u32 addr, u32 msr) for (; iter.first != iter.second; iter.first++) { JitBlock& b = iter.first->second; - if (b.effectiveAddress == addr && b.msrBits == (msr & JIT_CACHE_MSR_MASK)) + if (b.effectiveAddress == addr && b.feature_flags == feature_flags) return &b; } @@ -189,15 +189,14 @@ const u8* JitBaseBlockCache::Dispatch() if (m_entry_points_ptr) { u8* entry_point = - m_entry_points_ptr[FastLookupIndexForAddress(ppc_state.pc, ppc_state.msr.Hex)]; + m_entry_points_ptr[FastLookupIndexForAddress(ppc_state.pc, ppc_state.feature_flags)]; if (entry_point) { return entry_point; } else { - JitBlock* block = - MoveBlockIntoFastCache(ppc_state.pc, ppc_state.msr.Hex & JIT_CACHE_MSR_MASK); + JitBlock* block = MoveBlockIntoFastCache(ppc_state.pc, ppc_state.feature_flags); if (!block) return nullptr; @@ -207,12 +206,12 @@ const u8* JitBaseBlockCache::Dispatch() } JitBlock* block = - m_fast_block_map_fallback[FastLookupIndexForAddress(ppc_state.pc, ppc_state.msr.Hex)]; + m_fast_block_map_fallback[FastLookupIndexForAddress(ppc_state.pc, ppc_state.feature_flags)]; if (!block || block->effectiveAddress != ppc_state.pc || - block->msrBits != (ppc_state.msr.Hex & JIT_CACHE_MSR_MASK)) + block->feature_flags != ppc_state.feature_flags) { - block = MoveBlockIntoFastCache(ppc_state.pc, ppc_state.msr.Hex & JIT_CACHE_MSR_MASK); + block = MoveBlockIntoFastCache(ppc_state.pc, ppc_state.feature_flags); } if (!block) @@ -374,7 +373,7 @@ void JitBaseBlockCache::LinkBlockExits(JitBlock& block) { if (!e.linkStatus) { - JitBlock* destinationBlock = GetBlockFromStartAddress(e.exitAddress, block.msrBits); + JitBlock* destinationBlock = GetBlockFromStartAddress(e.exitAddress, block.feature_flags); if (destinationBlock) { WriteLinkBlock(e, destinationBlock); @@ -393,7 +392,7 @@ void JitBaseBlockCache::LinkBlock(JitBlock& block) for (JitBlock* b2 : it->second) { - if (block.msrBits == b2->msrBits) + if (block.feature_flags == b2->feature_flags) LinkBlockExits(*b2); } } @@ -412,7 +411,7 @@ void JitBaseBlockCache::UnlinkBlock(const JitBlock& block) return; for (JitBlock* sourceBlock : it->second) { - if (sourceBlock->msrBits != block.msrBits) + if (sourceBlock->feature_flags != block.feature_flags) continue; for (auto& e : sourceBlock->linkData) @@ -460,9 +459,9 @@ void JitBaseBlockCache::DestroyBlock(JitBlock& block) WriteDestroyBlock(block); } -JitBlock* JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, u32 msr) +JitBlock* JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, CPUEmuFeatureFlags feature_flags) { - JitBlock* block = GetBlockFromStartAddress(addr, msr); + JitBlock* block = GetBlockFromStartAddress(addr, feature_flags); if (!block) return nullptr; @@ -484,7 +483,7 @@ JitBlock* JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, u32 msr) } // And create a new one - size_t index = FastLookupIndexForAddress(addr, msr); + size_t index = FastLookupIndexForAddress(addr, feature_flags); if (m_entry_points_ptr) m_entry_points_ptr[index] = block->normalEntry; else @@ -494,11 +493,11 @@ JitBlock* JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, u32 msr) return block; } -size_t JitBaseBlockCache::FastLookupIndexForAddress(u32 address, u32 msr) +size_t JitBaseBlockCache::FastLookupIndexForAddress(u32 address, u32 feature_flags) { if (m_entry_points_ptr) { - return ((msr & JIT_CACHE_MSR_MASK) << 26) | (address >> 2); + return (feature_flags << 30) | (address >> 2); } else { diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index de1486ceb2..feb872ee7b 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -17,6 +17,7 @@ #include "Common/CommonTypes.h" #include "Core/HW/Memmap.h" +#include "Core/PowerPC/Gekko.h" class JitBase; @@ -33,8 +34,8 @@ struct JitBlockData // The normal entry point for the block, returned by Dispatch(). u8* normalEntry; - // The MSR bits expected for this block to be valid; see JIT_CACHE_MSR_MASK. - u32 msrBits; + // The features that this block was compiled with support for. + CPUEmuFeatureFlags feature_flags; // The effective address (PC) for the beginning of the block. u32 effectiveAddress; // The physical address of the code represented by this block. @@ -48,8 +49,8 @@ struct JitBlockData // The number of PPC instructions represented by this block. Mostly // useful for logging. u32 originalSize; - // This tracks the position if this block within the fast block cache. - // We allow each block to have only one map entry. + // This tracks the position of this block within the fast block cache. + // We only allow each block to have one map entry. size_t fast_block_map_index; }; static_assert(std::is_standard_layout_v, "JitBlockData must have a standard layout"); @@ -128,13 +129,9 @@ public: class JitBaseBlockCache { public: - // Mask for the MSR bits which determine whether a compiled block - // is valid (MSR.IR and MSR.DR, the address translation bits). - static constexpr u32 JIT_CACHE_MSR_MASK = 0x30; - - // The value for the map is determined like this: - // ((4 GB guest memory space) / (4 bytes per address) * sizeof(JitBlock*)) * (4 for 2 bits of msr) - static constexpr u64 FAST_BLOCK_MAP_SIZE = 0x8'0000'0000; + // The size of the fast map is determined like this: + // ((4 GiB guest memory space) / (4-byte alignment) * sizeof(JitBlock*)) << (3 feature flag bits) + static constexpr u64 FAST_BLOCK_MAP_SIZE = 0x10'0000'0000; static constexpr u32 FAST_BLOCK_MAP_FALLBACK_ELEMENTS = 0x10000; static constexpr u32 FAST_BLOCK_MAP_FALLBACK_MASK = FAST_BLOCK_MAP_FALLBACK_ELEMENTS - 1; @@ -157,7 +154,7 @@ public: // Look for the block in the slow but accurate way. // This function shall be used if FastLookupIndexForAddress() failed. // This might return nullptr if there is no such block. - JitBlock* GetBlockFromStartAddress(u32 em_address, u32 msr); + JitBlock* GetBlockFromStartAddress(u32 em_address, CPUEmuFeatureFlags feature_flags); // Get the normal entry for the block associated with the current program // counter. This will JIT code if necessary. (This is the reference @@ -185,7 +182,7 @@ private: void UnlinkBlock(const JitBlock& block); void InvalidateICacheInternal(u32 physical_address, u32 address, u32 length, bool forced); - JitBlock* MoveBlockIntoFastCache(u32 em_address, u32 msr); + JitBlock* MoveBlockIntoFastCache(u32 em_address, CPUEmuFeatureFlags feature_flags); // Fast but risky block lookup based on fast_block_map. size_t FastLookupIndexForAddress(u32 address, u32 msr); diff --git a/Source/Core/Core/PowerPC/JitInterface.cpp b/Source/Core/Core/PowerPC/JitInterface.cpp index 8e31ad9c4f..d44a648fd6 100644 --- a/Source/Core/Core/PowerPC/JitInterface.cpp +++ b/Source/Core/Core/PowerPC/JitInterface.cpp @@ -187,12 +187,14 @@ JitInterface::GetHostCode(u32 address) const } auto& ppc_state = m_system.GetPPCState(); - JitBlock* block = m_jit->GetBlockCache()->GetBlockFromStartAddress(address, ppc_state.msr.Hex); + JitBlock* block = + m_jit->GetBlockCache()->GetBlockFromStartAddress(address, ppc_state.feature_flags); if (!block) { for (int i = 0; i < 500; i++) { - block = m_jit->GetBlockCache()->GetBlockFromStartAddress(address - 4 * i, ppc_state.msr.Hex); + block = m_jit->GetBlockCache()->GetBlockFromStartAddress(address - 4 * i, + ppc_state.feature_flags); if (block) break; } diff --git a/Source/Core/Core/PowerPC/PPCAnalyst.cpp b/Source/Core/Core/PowerPC/PPCAnalyst.cpp index cba02f2b53..eaf49c7c46 100644 --- a/Source/Core/Core/PowerPC/PPCAnalyst.cpp +++ b/Source/Core/Core/PowerPC/PPCAnalyst.cpp @@ -202,6 +202,23 @@ static void AnalyzeFunction2(Common::Symbol* func) func->flags = flags; } +static bool IsMtspr(UGeckoInstruction inst) +{ + return inst.OPCD == 31 && inst.SUBOP10 == 467; +} + +static bool IsSprInstructionUsingMmcr(UGeckoInstruction inst) +{ + const u32 index = (inst.SPRU << 5) | (inst.SPRL & 0x1F); + return index == SPR_MMCR0 || index == SPR_MMCR1; +} + +static bool InstructionCanEndBlock(const CodeOp& op) +{ + return (op.opinfo->flags & FL_ENDBLOCK) && + (!IsMtspr(op.inst) || IsSprInstructionUsingMmcr(op.inst)); +} + bool PPCAnalyzer::CanSwapAdjacentOps(const CodeOp& a, const CodeOp& b) const { const GekkoOPInfo* a_info = a.opinfo; @@ -222,9 +239,11 @@ bool PPCAnalyzer::CanSwapAdjacentOps(const CodeOp& a, const CodeOp& b) const // [1] https://bugs.dolphin-emu.org/issues/5864#note-7 if (a.canCauseException || b.canCauseException) return false; - if (a_flags & (FL_ENDBLOCK | FL_TIMER | FL_NO_REORDER | FL_SET_OE)) + if (a.canEndBlock || b.canEndBlock) return false; - if (b_flags & (FL_ENDBLOCK | FL_TIMER | FL_NO_REORDER | FL_SET_OE)) + if (a_flags & (FL_TIMER | FL_NO_REORDER | FL_SET_OE)) + return false; + if (b_flags & (FL_TIMER | FL_NO_REORDER | FL_SET_OE)) return false; if ((a_flags & (FL_SET_CA | FL_READ_CA)) && (b_flags & (FL_SET_CA | FL_READ_CA))) return false; @@ -597,7 +616,7 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock* block, CodeOp* code, code->wantsFPRF = (opinfo->flags & FL_READ_FPRF) != 0; code->outputFPRF = (opinfo->flags & FL_SET_FPRF) != 0; - code->canEndBlock = (opinfo->flags & FL_ENDBLOCK) != 0; + code->canEndBlock = InstructionCanEndBlock(*code); code->canCauseException = first_fpu_instruction || (opinfo->flags & (FL_LOADSTORE | FL_PROGRAMEXCEPTION)) != 0 || @@ -935,7 +954,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, { // Just pick the next instruction address += 4; - if (!conditional_continue && opinfo->flags & FL_ENDBLOCK) // right now we stop early + if (!conditional_continue && InstructionCanEndBlock(code[i])) // right now we stop early { found_exit = true; break; diff --git a/Source/Core/Core/PowerPC/PPCTables.cpp b/Source/Core/Core/PowerPC/PPCTables.cpp index 9d70fb662b..8eeb5da076 100644 --- a/Source/Core/Core/PowerPC/PPCTables.cpp +++ b/Source/Core/Core/PowerPC/PPCTables.cpp @@ -374,7 +374,7 @@ constexpr std::array s_table31{{ {210, "mtsr", OpType::System, 1, FL_IN_S | FL_PROGRAMEXCEPTION}, {242, "mtsrin", OpType::System, 1, FL_IN_SB | FL_PROGRAMEXCEPTION}, {339, "mfspr", OpType::SPR, 1, FL_OUT_D | FL_PROGRAMEXCEPTION}, - {467, "mtspr", OpType::SPR, 2, FL_IN_S | FL_PROGRAMEXCEPTION}, + {467, "mtspr", OpType::SPR, 2, FL_IN_S | FL_ENDBLOCK | FL_PROGRAMEXCEPTION}, {371, "mftb", OpType::System, 1, FL_OUT_D | FL_TIMER | FL_PROGRAMEXCEPTION}, {512, "mcrxr", OpType::System, 1, FL_SET_CRn | FL_READ_CA | FL_SET_CA}, {595, "mfsr", OpType::System, 3, FL_OUT_D | FL_PROGRAMEXCEPTION}, diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index 335658f532..b994b00f6e 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -137,6 +137,7 @@ void PowerPCManager::DoState(PointerWrap& p) } RoundingModeUpdated(m_ppc_state); + RecalculateAllFeatureFlags(m_ppc_state); auto& mmu = m_system.GetMMU(); mmu.IBATUpdated(); @@ -194,8 +195,6 @@ void PowerPCManager::ResetRegisters() } m_ppc_state.SetXER({}); - RoundingModeUpdated(m_ppc_state); - auto& mmu = m_system.GetMMU(); mmu.DBATUpdated(); mmu.IBATUpdated(); @@ -208,6 +207,9 @@ void PowerPCManager::ResetRegisters() m_ppc_state.msr.Hex = 0; m_ppc_state.spr[SPR_DEC] = 0xFFFFFFFF; SystemTimers::DecrementerSet(); + + RoundingModeUpdated(m_ppc_state); + RecalculateAllFeatureFlags(m_ppc_state); } void PowerPCManager::InitializeCPUCore(CPUCore cpu_core) @@ -581,15 +583,15 @@ void PowerPCManager::CheckExceptions() DEBUG_LOG_FMT(POWERPC, "EXCEPTION_ALIGNMENT"); m_ppc_state.Exceptions &= ~EXCEPTION_ALIGNMENT; } - - // EXTERNAL INTERRUPT else { + // EXTERNAL INTERRUPT CheckExternalExceptions(); return; } m_system.GetJitInterface().UpdateMembase(); + MSRUpdated(m_ppc_state); } void PowerPCManager::CheckExternalExceptions() @@ -642,6 +644,7 @@ void PowerPCManager::CheckExternalExceptions() ERROR_LOG_FMT(POWERPC, "Unknown EXTERNAL INTERRUPT exception: Exceptions == {:08x}", exceptions); } + MSRUpdated(m_ppc_state); } m_system.GetJitInterface().UpdateMembase(); @@ -700,6 +703,36 @@ void RoundingModeUpdated(PowerPCState& ppc_state) Common::FPU::SetSIMDMode(ppc_state.fpscr.RN, ppc_state.fpscr.NI); } +void MSRUpdated(PowerPCState& ppc_state) +{ + static_assert(UReg_MSR{}.DR.StartBit() == 4); + static_assert(UReg_MSR{}.IR.StartBit() == 5); + static_assert(FEATURE_FLAG_MSR_DR == 1 << 0); + static_assert(FEATURE_FLAG_MSR_IR == 1 << 1); + + ppc_state.feature_flags = static_cast( + (ppc_state.feature_flags & FEATURE_FLAG_PERFMON) | ((ppc_state.msr.Hex >> 4) & 0x3)); +} + +void MMCRUpdated(PowerPCState& ppc_state) +{ + const bool perfmon = ppc_state.spr[SPR_MMCR0] || ppc_state.spr[SPR_MMCR1]; + ppc_state.feature_flags = static_cast( + (ppc_state.feature_flags & ~FEATURE_FLAG_PERFMON) | (perfmon ? FEATURE_FLAG_PERFMON : 0)); +} + +void RecalculateAllFeatureFlags(PowerPCState& ppc_state) +{ + static_assert(UReg_MSR{}.DR.StartBit() == 4); + static_assert(UReg_MSR{}.IR.StartBit() == 5); + static_assert(FEATURE_FLAG_MSR_DR == 1 << 0); + static_assert(FEATURE_FLAG_MSR_IR == 1 << 1); + + const bool perfmon = ppc_state.spr[SPR_MMCR0] || ppc_state.spr[SPR_MMCR1]; + ppc_state.feature_flags = static_cast(((ppc_state.msr.Hex >> 4) & 0x3) | + (perfmon ? FEATURE_FLAG_PERFMON : 0)); +} + void CheckExceptionsFromJIT(PowerPCManager& power_pc) { power_pc.CheckExceptions(); diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 211d0f37e5..afcc0ac8bf 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -141,6 +141,8 @@ struct PowerPCState UReg_MSR msr; // machine state register UReg_FPSCR fpscr; // floating point flags/status bits + CPUEmuFeatureFlags feature_flags; + // Exception management. u32 Exceptions = 0; @@ -346,5 +348,8 @@ void CheckBreakPointsFromJIT(PowerPCManager& power_pc); #define TU(ppc_state) (ppc_state).spr[SPR_TU] void RoundingModeUpdated(PowerPCState& ppc_state); +void MSRUpdated(PowerPCState& ppc_state); +void MMCRUpdated(PowerPCState& ppc_state); +void RecalculateAllFeatureFlags(PowerPCState& ppc_state); } // namespace PowerPC diff --git a/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp b/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp index fa31e620fc..b9ab4ad4dd 100644 --- a/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp +++ b/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp @@ -448,7 +448,10 @@ void RegisterWidget::PopulateTable() // MSR AddRegister( 23, 5, RegisterType::msr, "MSR", [this] { return m_system.GetPPCState().msr.Hex; }, - [this](u64 value) { m_system.GetPPCState().msr.Hex = value; }); + [this](u64 value) { + m_system.GetPPCState().msr.Hex = value; + PowerPC::MSRUpdated(m_system.GetPPCState()); + }); // SRR 0-1 AddRegister(