diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 137e7b6ab4..251f71c135 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -483,7 +483,8 @@ void Jit64::FakeBLCall(u32 after) // We may need to fake the BLR stack on inlined CALL instructions. // Else we can't return to this location any more. - MOV(32, R(RSCRATCH2), Imm32(after)); + MOV(64, R(RSCRATCH2), + Imm64(u64(m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK) << 32 | after)); PUSH(RSCRATCH2); FixupBranch skip_exit = CALL(); POP(RSCRATCH2); @@ -523,7 +524,8 @@ void Jit64::WriteExit(u32 destination, bool bl, u32 after) if (bl) { - MOV(32, R(RSCRATCH2), Imm32(after)); + MOV(64, R(RSCRATCH2), + Imm64(u64(m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK) << 32 | after)); PUSH(RSCRATCH2); } @@ -580,7 +582,8 @@ void Jit64::WriteExitDestInRSCRATCH(bool bl, u32 after) if (bl) { - MOV(32, R(RSCRATCH2), Imm32(after)); + MOV(64, R(RSCRATCH2), + Imm64(u64(m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK) << 32 | after)); PUSH(RSCRATCH2); } @@ -608,6 +611,13 @@ void Jit64::WriteBLRExit() bool disturbed = Cleanup(); if (disturbed) MOV(32, R(RSCRATCH), PPCSTATE(pc)); + const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK; + if (msr_bits != 0) + { + MOV(32, R(RSCRATCH2), Imm32(msr_bits)); + SHL(64, R(RSCRATCH2), Imm8(32)); + OR(64, R(RSCRATCH), R(RSCRATCH2)); + } MOV(32, R(RSCRATCH2), Imm32(js.downcountAmount)); CMP(64, R(RSCRATCH), MDisp(RSP, 8)); J_CC(CC_NE, asm_routines.dispatcher_mispredicted_blr); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index 48e6879dce..c7cadd2a3b 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -445,10 +445,6 @@ void Jit64::mtmsr(UGeckoInstruction inst) gpr.Flush(); fpr.Flush(); - // Our jit cache also stores some MSR bits, as they have changed, we either - // have to validate them in the BLR/RET check, or just flush the stack here. - asm_routines.ResetStack(*this); - // If some exceptions are pending and EE are now enabled, force checking // external exceptions when going out of mtmsr in order to execute delayed // interrupts as soon as possible. diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 798b7c88a6..1e69976bf7 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -382,12 +382,21 @@ void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return const u8* host_address_after_return; if (LK) { - // Push {ARM_PC; PPC_PC} on the stack - ARM64Reg reg_to_push = exit_address_after_return_reg; + // Push {ARM_PC (64-bit); PPC_PC (32-bit); MSR_BITS (32-bit)} on the stack + ARM64Reg reg_to_push = ARM64Reg::X1; + const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK; if (exit_address_after_return_reg == ARM64Reg::INVALID_REG) { - MOVI2R(ARM64Reg::X1, exit_address_after_return); - reg_to_push = ARM64Reg::X1; + MOVI2R(ARM64Reg::X1, msr_bits << 32 | exit_address_after_return); + } + else if (msr_bits == 0) + { + reg_to_push = EncodeRegTo64(exit_address_after_return_reg); + } + else + { + ORRI2R(ARM64Reg::X1, EncodeRegTo64(exit_address_after_return_reg), msr_bits << 32, + ARM64Reg::X1); } constexpr s32 adr_offset = JitArm64BlockCache::BLOCK_LINK_SIZE + sizeof(u32) * 2; host_address_after_return = GetCodePtr() + adr_offset; @@ -477,14 +486,22 @@ void JitArm64::WriteExit(Arm64Gen::ARM64Reg dest, bool LK, u32 exit_address_afte } else { - // Push {ARM_PC, PPC_PC} on the stack - ARM64Reg reg_to_push = exit_address_after_return_reg; + // Push {ARM_PC (64-bit); PPC_PC (32-bit); MSR_BITS (32-bit)} on the stack + ARM64Reg reg_to_push = ARM64Reg::X1; + const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK; if (exit_address_after_return_reg == ARM64Reg::INVALID_REG) { - MOVI2R(ARM64Reg::X1, exit_address_after_return); - reg_to_push = ARM64Reg::X1; + MOVI2R(ARM64Reg::X1, msr_bits << 32 | exit_address_after_return); + } + else if (msr_bits == 0) + { + reg_to_push = EncodeRegTo64(exit_address_after_return_reg); + } + else + { + ORRI2R(ARM64Reg::X1, EncodeRegTo64(exit_address_after_return_reg), msr_bits << 32, + ARM64Reg::X1); } - MOVI2R(ARM64Reg::X1, exit_address_after_return); constexpr s32 adr_offset = sizeof(u32) * 3; const u8* host_address_after_return = GetCodePtr() + adr_offset; ADR(ARM64Reg::X0, adr_offset); @@ -540,19 +557,33 @@ void JitArm64::FakeLKExit(u32 exit_address_after_return, ARM64Reg exit_address_a // function has been called! gpr.Lock(ARM64Reg::W30); } - ARM64Reg after_reg = exit_address_after_return_reg; + // Push {ARM_PC (64-bit); PPC_PC (32-bit); MSR_BITS (32-bit)} on the stack + ARM64Reg after_reg = ARM64Reg::INVALID_REG; + ARM64Reg reg_to_push; + const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK; if (exit_address_after_return_reg == ARM64Reg::INVALID_REG) { after_reg = gpr.GetReg(); - MOVI2R(after_reg, exit_address_after_return); + reg_to_push = EncodeRegTo64(after_reg); + MOVI2R(reg_to_push, msr_bits << 32 | exit_address_after_return); + } + else if (msr_bits == 0) + { + reg_to_push = EncodeRegTo64(exit_address_after_return_reg); + } + else + { + after_reg = gpr.GetReg(); + reg_to_push = EncodeRegTo64(after_reg); + ORRI2R(reg_to_push, EncodeRegTo64(exit_address_after_return_reg), msr_bits << 32, reg_to_push); } ARM64Reg code_reg = gpr.GetReg(); constexpr s32 adr_offset = sizeof(u32) * 3; const u8* host_address_after_return = GetCodePtr() + adr_offset; ADR(EncodeRegTo64(code_reg), adr_offset); - STP(IndexType::Pre, EncodeRegTo64(code_reg), EncodeRegTo64(after_reg), ARM64Reg::SP, -16); + STP(IndexType::Pre, EncodeRegTo64(code_reg), reg_to_push, ARM64Reg::SP, -16); gpr.Unlock(code_reg); - if (after_reg != exit_address_after_return_reg) + if (after_reg != ARM64Reg::INVALID_REG) gpr.Unlock(after_reg); FixupBranch skip_exit = BL(); @@ -608,9 +639,18 @@ void JitArm64::WriteBLRExit(Arm64Gen::ARM64Reg dest) Cleanup(); EndTimeProfile(js.curBlock); - // Check if {ARM_PC, PPC_PC} matches the current state. + // Check if {PPC_PC, MSR_BITS} matches the current state, then RET to ARM_PC. LDP(IndexType::Post, ARM64Reg::X2, ARM64Reg::X1, ARM64Reg::SP, 16); - CMP(ARM64Reg::W1, DISPATCHER_PC); + const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK; + if (msr_bits == 0) + { + CMP(ARM64Reg::X1, EncodeRegTo64(DISPATCHER_PC)); + } + else + { + ORRI2R(ARM64Reg::X0, EncodeRegTo64(DISPATCHER_PC), msr_bits << 32, ARM64Reg::X0); + CMP(ARM64Reg::X1, ARM64Reg::X0); + } FixupBranch no_match = B(CC_NEQ); DoDownCount(); // overwrites X0 + X1 diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 2802343b3a..9e229b4767 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -104,10 +104,6 @@ void JitArm64::mtmsr(UGeckoInstruction inst) gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); - // Our jit cache also stores some MSR bits, as they have changed, we either - // have to validate them in the BLR/RET check, or just flush the stack here. - ResetStack(); - WriteExceptionExit(js.compilerPC + 4, true); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index a10f4ae5d0..0adb657841 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -50,7 +50,7 @@ void JitArm64::GenerateAsm() STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(stored_stack_pointer)); // Push {nullptr; -1} as invalid destination on the stack. - MOVI2R(ARM64Reg::X0, 0xFFFFFFFF); + MOVI2R(ARM64Reg::X0, 0xFFFF'FFFF'FFFF'FFFF); STP(IndexType::Pre, ARM64Reg::ZR, ARM64Reg::X0, ARM64Reg::SP, -16); // The PC will be loaded into DISPATCHER_PC after the call to CoreTiming::Advance().