Jit: Further improve flushing for skipped instructions

Now we no longer have to wait until we've compiled the next instruction
after a skipped instruction before we can flush registers.

This commit is easier to read using "Ignore whitespace".
This commit is contained in:
JosJuice 2024-05-29 22:07:15 +02:00
parent c04d8415b3
commit 7dddc39068
2 changed files with 348 additions and 337 deletions

View File

@ -979,207 +979,216 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
js.isLastInstruction = true;
}
if (i != 0)
if (js.skipInstructions != 0)
{
// Gather pipe writes using a non-immediate address are discovered by profiling.
const u32 prev_address = m_code_buffer[i - 1].address;
bool gatherPipeIntCheck = js.fifoWriteAddresses.contains(prev_address);
// Gather pipe writes using an immediate address are explicitly tracked.
if (jo.optimizeGatherPipe &&
(js.fifoBytesSinceCheck >= GPFifo::GATHER_PIPE_SIZE || js.mustCheckFifo))
{
js.fifoBytesSinceCheck = 0;
js.mustCheckFifo = false;
BitSet32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
ABI_CallFunctionP(GPFifo::FastCheckGatherPipe, &m_system.GetGPFifo());
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
gatherPipeIntCheck = true;
}
// Gather pipe writes can generate an exception; add an exception check.
// TODO: This doesn't really match hardware; the CP interrupt is
// asynchronous.
if (gatherPipeIntCheck)
{
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT));
FixupBranch extException = J_CC(CC_NZ, Jump::Near);
SwitchToFarCode();
SetJumpTarget(extException);
TEST(32, PPCSTATE(msr), Imm32(0x0008000));
FixupBranch noExtIntEnable = J_CC(CC_Z, Jump::Near);
MOV(64, R(RSCRATCH), ImmPtr(&m_system.GetProcessorInterface().m_interrupt_cause));
TEST(32, MatR(RSCRATCH),
Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN |
ProcessorInterface::INT_CAUSE_PE_FINISH));
FixupBranch noCPInt = J_CC(CC_Z, Jump::Near);
{
RCForkGuard gpr_guard = gpr.Fork();
RCForkGuard fpr_guard = fpr.Fork();
gpr.Flush();
fpr.Flush();
MOV(32, PPCSTATE(pc), Imm32(op.address));
WriteExternalExceptionExit();
}
SwitchToNearCode();
SetJumpTarget(noCPInt);
SetJumpTarget(noExtIntEnable);
}
}
if (HandleFunctionHooking(op.address))
break;
if (op.skip)
{
if (IsDebuggingEnabled())
{
// The only thing that currently sets op.skip is the BLR following optimization.
// If any non-branch instruction starts setting that too, this will need to be changed.
ASSERT(op.inst.hex == 0x4e800020);
WriteBranchWatch<true>(op.address, op.branchTo, op.inst, RSCRATCH, RSCRATCH2,
CallerSavedRegistersInUse());
}
js.skipInstructions--;
}
else
{
auto& cpu = m_system.GetCPU();
auto& power_pc = m_system.GetPowerPC();
if (IsDebuggingEnabled() && power_pc.GetBreakPoints().IsAddressBreakPoint(op.address) &&
!cpu.IsStepping())
if (i != 0)
{
gpr.Flush();
fpr.Flush();
// Gather pipe writes using a non-immediate address are discovered by profiling.
const u32 prev_address = m_code_buffer[i - 1].address;
bool gatherPipeIntCheck = js.fifoWriteAddresses.contains(prev_address);
MOV(32, PPCSTATE(pc), Imm32(op.address));
ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunctionP(PowerPC::CheckAndHandleBreakPointsFromJIT, &power_pc);
ABI_PopRegistersAndAdjustStack({}, 0);
MOV(64, R(RSCRATCH), ImmPtr(cpu.GetStatePtr()));
CMP(32, MatR(RSCRATCH), Imm32(Common::ToUnderlying(CPU::State::Running)));
FixupBranch noBreakpoint = J_CC(CC_E);
Cleanup();
MOV(32, PPCSTATE(npc), Imm32(op.address));
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
JMP(asm_routines.dispatcher_exit, Jump::Near);
SetJumpTarget(noBreakpoint);
}
if ((opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound)
{
// This instruction uses FPU - needs to add FP exception bailout
TEST(32, PPCSTATE(msr), Imm32(1 << 13)); // Test FP enabled bit
FixupBranch b1 = J_CC(CC_Z, Jump::Near);
SwitchToFarCode();
SetJumpTarget(b1);
// Gather pipe writes using an immediate address are explicitly tracked.
if (jo.optimizeGatherPipe &&
(js.fifoBytesSinceCheck >= GPFifo::GATHER_PIPE_SIZE || js.mustCheckFifo))
{
RCForkGuard gpr_guard = gpr.Fork();
RCForkGuard fpr_guard = fpr.Fork();
gpr.Flush();
fpr.Flush();
// If a FPU exception occurs, the exception handler will read
// from PC. Update PC with the latest value in case that happens.
MOV(32, PPCSTATE(pc), Imm32(op.address));
OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE));
WriteExceptionExit();
js.fifoBytesSinceCheck = 0;
js.mustCheckFifo = false;
BitSet32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
ABI_CallFunctionP(GPFifo::FastCheckGatherPipe, &m_system.GetGPFifo());
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
gatherPipeIntCheck = true;
}
SwitchToNearCode();
js.firstFPInstructionFound = true;
// Gather pipe writes can generate an exception; add an exception check.
// TODO: This doesn't really match hardware; the CP interrupt is
// asynchronous.
if (gatherPipeIntCheck)
{
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT));
FixupBranch extException = J_CC(CC_NZ, Jump::Near);
SwitchToFarCode();
SetJumpTarget(extException);
TEST(32, PPCSTATE(msr), Imm32(0x0008000));
FixupBranch noExtIntEnable = J_CC(CC_Z, Jump::Near);
MOV(64, R(RSCRATCH), ImmPtr(&m_system.GetProcessorInterface().m_interrupt_cause));
TEST(32, MatR(RSCRATCH),
Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN |
ProcessorInterface::INT_CAUSE_PE_FINISH));
FixupBranch noCPInt = J_CC(CC_Z, Jump::Near);
{
RCForkGuard gpr_guard = gpr.Fork();
RCForkGuard fpr_guard = fpr.Fork();
gpr.Flush();
fpr.Flush();
MOV(32, PPCSTATE(pc), Imm32(op.address));
WriteExternalExceptionExit();
}
SwitchToNearCode();
SetJumpTarget(noCPInt);
SetJumpTarget(noExtIntEnable);
}
}
if (bJITRegisterCacheOff)
if (HandleFunctionHooking(op.address))
break;
if (op.skip)
{
gpr.Flush();
fpr.Flush();
if (IsDebuggingEnabled())
{
// The only thing that currently sets op.skip is the BLR following optimization.
// If any non-branch instruction starts setting that too, this will need to be changed.
ASSERT(op.inst.hex == 0x4e800020);
WriteBranchWatch<true>(op.address, op.branchTo, op.inst, RSCRATCH, RSCRATCH2,
CallerSavedRegistersInUse());
}
}
else
{
// If we have an input register that is going to be used again, load it pre-emptively,
// even if the instruction doesn't strictly need it in a register, to avoid redundant
// loads later. Of course, don't do this if we're already out of registers.
// As a bit of a heuristic, make sure we have at least one register left over for the
// output, which needs to be bound in the actual instruction compilation.
// TODO: make this smarter in the case that we're actually register-starved, i.e.
// prioritize the more important registers.
gpr.PreloadRegisters(op.regsIn & op.gprInUse & ~op.gprDiscardable);
fpr.PreloadRegisters(op.fregsIn & op.fprInXmm & ~op.fprDiscardable);
}
CompileInstruction(op);
js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst;
if (jo.memcheck && (opinfo->flags & FL_LOADSTORE))
{
// If we have a fastmem loadstore, we can omit the exception check and let fastmem handle
// it.
FixupBranch memException;
ASSERT_MSG(DYNA_REC, !(js.fastmemLoadStore && js.fixupExceptionHandler),
"Fastmem loadstores shouldn't have exception handler fixups (PC={:x})!",
op.address);
if (!js.fastmemLoadStore && !js.fixupExceptionHandler)
auto& cpu = m_system.GetCPU();
auto& power_pc = m_system.GetPowerPC();
if (IsDebuggingEnabled() && power_pc.GetBreakPoints().IsAddressBreakPoint(op.address) &&
!cpu.IsStepping())
{
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
memException = J_CC(CC_NZ, Jump::Near);
gpr.Flush();
fpr.Flush();
MOV(32, PPCSTATE(pc), Imm32(op.address));
ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunctionP(PowerPC::CheckAndHandleBreakPointsFromJIT, &power_pc);
ABI_PopRegistersAndAdjustStack({}, 0);
MOV(64, R(RSCRATCH), ImmPtr(cpu.GetStatePtr()));
CMP(32, MatR(RSCRATCH), Imm32(Common::ToUnderlying(CPU::State::Running)));
FixupBranch noBreakpoint = J_CC(CC_E);
Cleanup();
MOV(32, PPCSTATE(npc), Imm32(op.address));
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
JMP(asm_routines.dispatcher_exit, Jump::Near);
SetJumpTarget(noBreakpoint);
}
SwitchToFarCode();
if (!js.fastmemLoadStore)
if ((opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound)
{
m_exception_handler_at_loc[js.fastmemLoadStore] = nullptr;
SetJumpTarget(js.fixupExceptionHandler ? js.exceptionHandler : memException);
// This instruction uses FPU - needs to add FP exception bailout
TEST(32, PPCSTATE(msr), Imm32(1 << 13)); // Test FP enabled bit
FixupBranch b1 = J_CC(CC_Z, Jump::Near);
SwitchToFarCode();
SetJumpTarget(b1);
{
RCForkGuard gpr_guard = gpr.Fork();
RCForkGuard fpr_guard = fpr.Fork();
gpr.Flush();
fpr.Flush();
// If a FPU exception occurs, the exception handler will read
// from PC. Update PC with the latest value in case that happens.
MOV(32, PPCSTATE(pc), Imm32(op.address));
OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE));
WriteExceptionExit();
}
SwitchToNearCode();
js.firstFPInstructionFound = true;
}
if (bJITRegisterCacheOff)
{
gpr.Flush();
fpr.Flush();
}
else
{
m_exception_handler_at_loc[js.fastmemLoadStore] = GetWritableCodePtr();
// If we have an input register that is going to be used again, load it pre-emptively,
// even if the instruction doesn't strictly need it in a register, to avoid redundant
// loads later. Of course, don't do this if we're already out of registers.
// As a bit of a heuristic, make sure we have at least one register left over for the
// output, which needs to be bound in the actual instruction compilation.
// TODO: make this smarter in the case that we're actually register-starved, i.e.
// prioritize the more important registers.
gpr.PreloadRegisters(op.regsIn & op.gprInUse & ~op.gprDiscardable);
fpr.PreloadRegisters(op.fregsIn & op.fprInXmm & ~op.fprDiscardable);
}
RCForkGuard gpr_guard = gpr.Fork();
RCForkGuard fpr_guard = fpr.Fork();
CompileInstruction(op);
gpr.Revert();
fpr.Revert();
gpr.Flush();
fpr.Flush();
js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst;
MOV(32, PPCSTATE(pc), Imm32(op.address));
WriteExceptionExit();
SwitchToNearCode();
if (jo.memcheck && (opinfo->flags & FL_LOADSTORE))
{
// If we have a fastmem loadstore, we can omit the exception check and let fastmem handle
// it.
FixupBranch memException;
ASSERT_MSG(DYNA_REC, !(js.fastmemLoadStore && js.fixupExceptionHandler),
"Fastmem loadstores shouldn't have exception handler fixups (PC={:x})!",
op.address);
if (!js.fastmemLoadStore && !js.fixupExceptionHandler)
{
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
memException = J_CC(CC_NZ, Jump::Near);
}
SwitchToFarCode();
if (!js.fastmemLoadStore)
{
m_exception_handler_at_loc[js.fastmemLoadStore] = nullptr;
SetJumpTarget(js.fixupExceptionHandler ? js.exceptionHandler : memException);
}
else
{
m_exception_handler_at_loc[js.fastmemLoadStore] = GetWritableCodePtr();
}
RCForkGuard gpr_guard = gpr.Fork();
RCForkGuard fpr_guard = fpr.Fork();
gpr.Revert();
fpr.Revert();
gpr.Flush();
fpr.Flush();
MOV(32, PPCSTATE(pc), Imm32(op.address));
WriteExceptionExit();
SwitchToNearCode();
}
gpr.Commit();
fpr.Commit();
if (opinfo->flags & FL_LOADSTORE)
++js.numLoadStoreInst;
if (opinfo->flags & FL_USE_FPU)
++js.numFloatingPointInst;
}
gpr.Commit();
fpr.Commit();
// If we have a register that will never be used again, discard or flush it.
if (!bJITRegisterCacheOff)
{
gpr.Discard(op.gprDiscardable);
fpr.Discard(op.fprDiscardable);
}
gpr.Flush(~op.gprInUse & previous_gpr_in_use);
fpr.Flush(~op.fprInUse & previous_fpr_in_use);
previous_gpr_in_use = op.gprInUse;
previous_fpr_in_use = op.fprInUse;
if (opinfo->flags & FL_LOADSTORE)
++js.numLoadStoreInst;
if (opinfo->flags & FL_USE_FPU)
++js.numFloatingPointInst;
}
js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst;
// If we have a register that will never be used again, discard or flush it.
if (!bJITRegisterCacheOff)
{
gpr.Discard(op.gprDiscardable);
fpr.Discard(op.fprDiscardable);
}
gpr.Flush(~op.gprInUse & previous_gpr_in_use);
fpr.Flush(~op.fprInUse & previous_fpr_in_use);
previous_gpr_in_use = op.gprInUse;
previous_fpr_in_use = op.fprInUse;
#if defined(_DEBUG) || defined(DEBUGFAST)
if (!gpr.SanityCheck() || !fpr.SanityCheck())
{
@ -1187,8 +1196,6 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
NOTICE_LOG_FMT(DYNA_REC, "Unflushed register: {}", ppc_inst);
}
#endif
i += js.skipInstructions;
js.skipInstructions = 0;
}
if (code_block.m_broken)

View File

@ -1188,193 +1188,197 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
js.downcountAmount += opinfo->num_cycles;
js.isLastInstruction = i == (code_block.m_num_instructions - 1);
// Skip calling UpdateLastUsed for lmw/stmw - it usually hurts more than it helps
if (op.inst.OPCD != 46 && op.inst.OPCD != 47)
gpr.UpdateLastUsed(op.regsIn | op.regsOut);
BitSet32 fpr_used = op.fregsIn;
if (op.fregOut >= 0)
fpr_used[op.fregOut] = true;
fpr.UpdateLastUsed(fpr_used);
if (i != 0)
if (js.skipInstructions != 0)
{
// Gather pipe writes using a non-immediate address are discovered by profiling.
const u32 prev_address = m_code_buffer[i - 1].address;
bool gatherPipeIntCheck = js.fifoWriteAddresses.contains(prev_address);
if (jo.optimizeGatherPipe &&
(js.fifoBytesSinceCheck >= GPFifo::GATHER_PIPE_SIZE || js.mustCheckFifo))
{
js.fifoBytesSinceCheck = 0;
js.mustCheckFifo = false;
gpr.Lock(ARM64Reg::W30);
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W30)] = 0;
ABI_PushRegisters(regs_in_use);
m_float_emit.ABI_PushRegisters(fprs_in_use, ARM64Reg::X30);
ABI_CallFunction(&GPFifo::FastCheckGatherPipe, &m_system.GetGPFifo());
m_float_emit.ABI_PopRegisters(fprs_in_use, ARM64Reg::X30);
ABI_PopRegisters(regs_in_use);
gpr.Unlock(ARM64Reg::W30);
gatherPipeIntCheck = true;
}
// Gather pipe writes can generate an exception; add an exception check.
// TODO: This doesn't really match hardware; the CP interrupt is
// asynchronous.
if (jo.optimizeGatherPipe && gatherPipeIntCheck)
{
auto WA = gpr.GetScopedReg();
ARM64Reg XA = EncodeRegTo64(WA);
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
FixupBranch no_ext_exception = TBZ(WA, MathUtil::IntLog2(EXCEPTION_EXTERNAL_INT));
FixupBranch exception = B();
SwitchToFarCode();
const u8* done_here = GetCodePtr();
FixupBranch exit = B();
SetJumpTarget(exception);
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr));
TBZ(WA, 15, done_here); // MSR.EE
LDR(IndexType::Unsigned, WA, XA,
MOVPage2R(XA, &m_system.GetProcessorInterface().m_interrupt_cause));
constexpr u32 cause_mask = ProcessorInterface::INT_CAUSE_CP |
ProcessorInterface::INT_CAUSE_PE_TOKEN |
ProcessorInterface::INT_CAUSE_PE_FINISH;
TST(WA, LogicalImm(cause_mask, GPRSize::B32));
B(CC_EQ, done_here);
gpr.Flush(FlushMode::MaintainState, WA);
fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG);
WriteExceptionExit(js.compilerPC, true, true);
SwitchToNearCode();
SetJumpTarget(no_ext_exception);
SetJumpTarget(exit);
}
}
if (HandleFunctionHooking(op.address))
break;
if (op.skip)
{
if (IsDebuggingEnabled())
{
// The only thing that currently sets op.skip is the BLR following optimization.
// If any non-branch instruction starts setting that too, this will need to be changed.
ASSERT(op.inst.hex == 0x4e800020);
const auto bw_reg_a = gpr.GetScopedReg(), bw_reg_b = gpr.GetScopedReg();
const BitSet32 gpr_caller_save =
gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(bw_reg_a), DecodeReg(bw_reg_b)};
WriteBranchWatch<true>(op.address, op.branchTo, op.inst, bw_reg_a, bw_reg_b,
gpr_caller_save, fpr.GetCallerSavedUsed());
}
js.skipInstructions--;
}
else
{
if (IsDebuggingEnabled() && !cpu.IsStepping() &&
m_system.GetPowerPC().GetBreakPoints().IsAddressBreakPoint(op.address))
// Skip calling UpdateLastUsed for lmw/stmw - it usually hurts more than it helps
if (op.inst.OPCD != 46 && op.inst.OPCD != 47)
gpr.UpdateLastUsed(op.regsIn | op.regsOut);
BitSet32 fpr_used = op.fregsIn;
if (op.fregOut >= 0)
fpr_used[op.fregOut] = true;
fpr.UpdateLastUsed(fpr_used);
if (i != 0)
{
FlushCarry();
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
// Gather pipe writes using a non-immediate address are discovered by profiling.
const u32 prev_address = m_code_buffer[i - 1].address;
bool gatherPipeIntCheck = js.fifoWriteAddresses.contains(prev_address);
static_assert(PPCSTATE_OFF(pc) <= 252);
static_assert(PPCSTATE_OFF(pc) + 4 == PPCSTATE_OFF(npc));
MOVI2R(DISPATCHER_PC, op.address);
STP(IndexType::Signed, DISPATCHER_PC, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
ABI_CallFunction(&PowerPC::CheckAndHandleBreakPointsFromJIT, &m_system.GetPowerPC());
LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0,
MOVPage2R(ARM64Reg::X0, cpu.GetStatePtr()));
static_assert(Common::ToUnderlying(CPU::State::Running) == 0);
FixupBranch no_breakpoint = CBZ(ARM64Reg::W0);
Cleanup();
if (IsProfilingEnabled())
if (jo.optimizeGatherPipe &&
(js.fifoBytesSinceCheck >= GPFifo::GATHER_PIPE_SIZE || js.mustCheckFifo))
{
ABI_CallFunction(&JitBlock::ProfileData::EndProfiling, b->profile_data.get(),
js.downcountAmount);
js.fifoBytesSinceCheck = 0;
js.mustCheckFifo = false;
gpr.Lock(ARM64Reg::W30);
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W30)] = 0;
ABI_PushRegisters(regs_in_use);
m_float_emit.ABI_PushRegisters(fprs_in_use, ARM64Reg::X30);
ABI_CallFunction(&GPFifo::FastCheckGatherPipe, &m_system.GetGPFifo());
m_float_emit.ABI_PopRegisters(fprs_in_use, ARM64Reg::X30);
ABI_PopRegisters(regs_in_use);
gpr.Unlock(ARM64Reg::W30);
gatherPipeIntCheck = true;
}
DoDownCount();
B(dispatcher_exit);
SetJumpTarget(no_breakpoint);
}
if ((opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound)
{
FixupBranch b1;
// This instruction uses FPU - needs to add FP exception bailout
// Gather pipe writes can generate an exception; add an exception check.
// TODO: This doesn't really match hardware; the CP interrupt is
// asynchronous.
if (jo.optimizeGatherPipe && gatherPipeIntCheck)
{
auto WA = gpr.GetScopedReg();
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr));
b1 = TBNZ(WA, 13); // Test FP enabled bit
ARM64Reg XA = EncodeRegTo64(WA);
FixupBranch far_addr = B();
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
FixupBranch no_ext_exception = TBZ(WA, MathUtil::IntLog2(EXCEPTION_EXTERNAL_INT));
FixupBranch exception = B();
SwitchToFarCode();
SetJumpTarget(far_addr);
const u8* done_here = GetCodePtr();
FixupBranch exit = B();
SetJumpTarget(exception);
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr));
TBZ(WA, 15, done_here); // MSR.EE
LDR(IndexType::Unsigned, WA, XA,
MOVPage2R(XA, &m_system.GetProcessorInterface().m_interrupt_cause));
constexpr u32 cause_mask = ProcessorInterface::INT_CAUSE_CP |
ProcessorInterface::INT_CAUSE_PE_TOKEN |
ProcessorInterface::INT_CAUSE_PE_FINISH;
TST(WA, LogicalImm(cause_mask, GPRSize::B32));
B(CC_EQ, done_here);
gpr.Flush(FlushMode::MaintainState, WA);
fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG);
WriteExceptionExit(js.compilerPC, true, true);
SwitchToNearCode();
SetJumpTarget(no_ext_exception);
SetJumpTarget(exit);
}
}
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
ORR(WA, WA, LogicalImm(EXCEPTION_FPU_UNAVAILABLE, GPRSize::B32));
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
if (HandleFunctionHooking(op.address))
break;
if (op.skip)
{
if (IsDebuggingEnabled())
{
// The only thing that currently sets op.skip is the BLR following optimization.
// If any non-branch instruction starts setting that too, this will need to be changed.
ASSERT(op.inst.hex == 0x4e800020);
const auto bw_reg_a = gpr.GetScopedReg(), bw_reg_b = gpr.GetScopedReg();
const BitSet32 gpr_caller_save =
gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(bw_reg_a), DecodeReg(bw_reg_b)};
WriteBranchWatch<true>(op.address, op.branchTo, op.inst, bw_reg_a, bw_reg_b,
gpr_caller_save, fpr.GetCallerSavedUsed());
}
}
else
{
if (IsDebuggingEnabled() && !cpu.IsStepping() &&
m_system.GetPowerPC().GetBreakPoints().IsAddressBreakPoint(op.address))
{
FlushCarry();
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
static_assert(PPCSTATE_OFF(pc) <= 252);
static_assert(PPCSTATE_OFF(pc) + 4 == PPCSTATE_OFF(npc));
MOVI2R(DISPATCHER_PC, op.address);
STP(IndexType::Signed, DISPATCHER_PC, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
ABI_CallFunction(&PowerPC::CheckAndHandleBreakPointsFromJIT, &m_system.GetPowerPC());
LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0,
MOVPage2R(ARM64Reg::X0, cpu.GetStatePtr()));
static_assert(Common::ToUnderlying(CPU::State::Running) == 0);
FixupBranch no_breakpoint = CBZ(ARM64Reg::W0);
Cleanup();
if (IsProfilingEnabled())
{
ABI_CallFunction(&JitBlock::ProfileData::EndProfiling, b->profile_data.get(),
js.downcountAmount);
}
DoDownCount();
B(dispatcher_exit);
SetJumpTarget(no_breakpoint);
}
WriteExceptionExit(js.compilerPC, false, true);
if ((opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound)
{
FixupBranch b1;
// This instruction uses FPU - needs to add FP exception bailout
{
auto WA = gpr.GetScopedReg();
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr));
b1 = TBNZ(WA, 13); // Test FP enabled bit
SwitchToNearCode();
FixupBranch far_addr = B();
SwitchToFarCode();
SetJumpTarget(far_addr);
SetJumpTarget(b1);
gpr.Flush(FlushMode::MaintainState, WA);
fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG);
js.firstFPInstructionFound = true;
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
ORR(WA, WA, LogicalImm(EXCEPTION_FPU_UNAVAILABLE, GPRSize::B32));
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
}
WriteExceptionExit(js.compilerPC, false, true);
SwitchToNearCode();
SetJumpTarget(b1);
js.firstFPInstructionFound = true;
}
if (bJITRegisterCacheOff)
{
FlushCarry();
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
}
CompileInstruction(op);
if (opinfo->flags & FL_LOADSTORE)
++js.numLoadStoreInst;
if (opinfo->flags & FL_USE_FPU)
++js.numFloatingPointInst;
}
if (bJITRegisterCacheOff)
{
FlushCarry();
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
}
CompileInstruction(op);
js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst;
if (!CanMergeNextInstructions(1) || js.op[1].opinfo->type != ::OpType::Integer)
FlushCarry();
// If we have a register that will never be used again, discard or flush it.
if (!bJITRegisterCacheOff)
{
gpr.DiscardRegisters(op.gprDiscardable);
fpr.DiscardRegisters(op.fprDiscardable);
gpr.DiscardCRRegisters(op.crDiscardable);
}
gpr.StoreRegisters(~op.gprInUse & previous_gpr_in_use);
fpr.StoreRegisters(~op.fprInUse & previous_fpr_in_use);
gpr.StoreCRRegisters(~op.crInUse & previous_cr_in_use);
previous_gpr_in_use = op.gprInUse;
previous_fpr_in_use = op.fprInUse;
previous_cr_in_use = op.crInUse;
if (opinfo->flags & FL_LOADSTORE)
++js.numLoadStoreInst;
if (opinfo->flags & FL_USE_FPU)
++js.numFloatingPointInst;
}
i += js.skipInstructions;
js.skipInstructions = 0;
js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst;
if (!CanMergeNextInstructions(1) || js.op[1].opinfo->type != ::OpType::Integer)
FlushCarry();
// If we have a register that will never be used again, discard or flush it.
if (!bJITRegisterCacheOff)
{
gpr.DiscardRegisters(op.gprDiscardable);
fpr.DiscardRegisters(op.fprDiscardable);
gpr.DiscardCRRegisters(op.crDiscardable);
}
gpr.StoreRegisters(~op.gprInUse & previous_gpr_in_use);
fpr.StoreRegisters(~op.fprInUse & previous_fpr_in_use);
gpr.StoreCRRegisters(~op.crInUse & previous_cr_in_use);
previous_gpr_in_use = op.gprInUse;
previous_fpr_in_use = op.fprInUse;
previous_cr_in_use = op.crInUse;
}
if (code_block.m_broken)