mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-09 23:59:27 +01:00
Jit: Further improve flushing for skipped instructions
Now we no longer have to wait until we've compiled the next instruction after a skipped instruction before we can flush registers. This commit is easier to read using "Ignore whitespace".
This commit is contained in:
parent
c04d8415b3
commit
7dddc39068
@ -979,207 +979,216 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
|||||||
js.isLastInstruction = true;
|
js.isLastInstruction = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (i != 0)
|
if (js.skipInstructions != 0)
|
||||||
{
|
{
|
||||||
// Gather pipe writes using a non-immediate address are discovered by profiling.
|
js.skipInstructions--;
|
||||||
const u32 prev_address = m_code_buffer[i - 1].address;
|
|
||||||
bool gatherPipeIntCheck = js.fifoWriteAddresses.contains(prev_address);
|
|
||||||
|
|
||||||
// Gather pipe writes using an immediate address are explicitly tracked.
|
|
||||||
if (jo.optimizeGatherPipe &&
|
|
||||||
(js.fifoBytesSinceCheck >= GPFifo::GATHER_PIPE_SIZE || js.mustCheckFifo))
|
|
||||||
{
|
|
||||||
js.fifoBytesSinceCheck = 0;
|
|
||||||
js.mustCheckFifo = false;
|
|
||||||
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
|
||||||
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
|
||||||
ABI_CallFunctionP(GPFifo::FastCheckGatherPipe, &m_system.GetGPFifo());
|
|
||||||
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
|
|
||||||
gatherPipeIntCheck = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Gather pipe writes can generate an exception; add an exception check.
|
|
||||||
// TODO: This doesn't really match hardware; the CP interrupt is
|
|
||||||
// asynchronous.
|
|
||||||
if (gatherPipeIntCheck)
|
|
||||||
{
|
|
||||||
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT));
|
|
||||||
FixupBranch extException = J_CC(CC_NZ, Jump::Near);
|
|
||||||
|
|
||||||
SwitchToFarCode();
|
|
||||||
SetJumpTarget(extException);
|
|
||||||
TEST(32, PPCSTATE(msr), Imm32(0x0008000));
|
|
||||||
FixupBranch noExtIntEnable = J_CC(CC_Z, Jump::Near);
|
|
||||||
MOV(64, R(RSCRATCH), ImmPtr(&m_system.GetProcessorInterface().m_interrupt_cause));
|
|
||||||
TEST(32, MatR(RSCRATCH),
|
|
||||||
Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN |
|
|
||||||
ProcessorInterface::INT_CAUSE_PE_FINISH));
|
|
||||||
FixupBranch noCPInt = J_CC(CC_Z, Jump::Near);
|
|
||||||
|
|
||||||
{
|
|
||||||
RCForkGuard gpr_guard = gpr.Fork();
|
|
||||||
RCForkGuard fpr_guard = fpr.Fork();
|
|
||||||
|
|
||||||
gpr.Flush();
|
|
||||||
fpr.Flush();
|
|
||||||
|
|
||||||
MOV(32, PPCSTATE(pc), Imm32(op.address));
|
|
||||||
WriteExternalExceptionExit();
|
|
||||||
}
|
|
||||||
SwitchToNearCode();
|
|
||||||
SetJumpTarget(noCPInt);
|
|
||||||
SetJumpTarget(noExtIntEnable);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (HandleFunctionHooking(op.address))
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (op.skip)
|
|
||||||
{
|
|
||||||
if (IsDebuggingEnabled())
|
|
||||||
{
|
|
||||||
// The only thing that currently sets op.skip is the BLR following optimization.
|
|
||||||
// If any non-branch instruction starts setting that too, this will need to be changed.
|
|
||||||
ASSERT(op.inst.hex == 0x4e800020);
|
|
||||||
WriteBranchWatch<true>(op.address, op.branchTo, op.inst, RSCRATCH, RSCRATCH2,
|
|
||||||
CallerSavedRegistersInUse());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
auto& cpu = m_system.GetCPU();
|
if (i != 0)
|
||||||
auto& power_pc = m_system.GetPowerPC();
|
|
||||||
if (IsDebuggingEnabled() && power_pc.GetBreakPoints().IsAddressBreakPoint(op.address) &&
|
|
||||||
!cpu.IsStepping())
|
|
||||||
{
|
{
|
||||||
gpr.Flush();
|
// Gather pipe writes using a non-immediate address are discovered by profiling.
|
||||||
fpr.Flush();
|
const u32 prev_address = m_code_buffer[i - 1].address;
|
||||||
|
bool gatherPipeIntCheck = js.fifoWriteAddresses.contains(prev_address);
|
||||||
|
|
||||||
MOV(32, PPCSTATE(pc), Imm32(op.address));
|
// Gather pipe writes using an immediate address are explicitly tracked.
|
||||||
ABI_PushRegistersAndAdjustStack({}, 0);
|
if (jo.optimizeGatherPipe &&
|
||||||
ABI_CallFunctionP(PowerPC::CheckAndHandleBreakPointsFromJIT, &power_pc);
|
(js.fifoBytesSinceCheck >= GPFifo::GATHER_PIPE_SIZE || js.mustCheckFifo))
|
||||||
ABI_PopRegistersAndAdjustStack({}, 0);
|
|
||||||
MOV(64, R(RSCRATCH), ImmPtr(cpu.GetStatePtr()));
|
|
||||||
CMP(32, MatR(RSCRATCH), Imm32(Common::ToUnderlying(CPU::State::Running)));
|
|
||||||
FixupBranch noBreakpoint = J_CC(CC_E);
|
|
||||||
|
|
||||||
Cleanup();
|
|
||||||
MOV(32, PPCSTATE(npc), Imm32(op.address));
|
|
||||||
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
|
|
||||||
JMP(asm_routines.dispatcher_exit, Jump::Near);
|
|
||||||
|
|
||||||
SetJumpTarget(noBreakpoint);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound)
|
|
||||||
{
|
|
||||||
// This instruction uses FPU - needs to add FP exception bailout
|
|
||||||
TEST(32, PPCSTATE(msr), Imm32(1 << 13)); // Test FP enabled bit
|
|
||||||
FixupBranch b1 = J_CC(CC_Z, Jump::Near);
|
|
||||||
|
|
||||||
SwitchToFarCode();
|
|
||||||
SetJumpTarget(b1);
|
|
||||||
{
|
{
|
||||||
RCForkGuard gpr_guard = gpr.Fork();
|
js.fifoBytesSinceCheck = 0;
|
||||||
RCForkGuard fpr_guard = fpr.Fork();
|
js.mustCheckFifo = false;
|
||||||
|
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
||||||
gpr.Flush();
|
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
||||||
fpr.Flush();
|
ABI_CallFunctionP(GPFifo::FastCheckGatherPipe, &m_system.GetGPFifo());
|
||||||
|
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
|
||||||
// If a FPU exception occurs, the exception handler will read
|
gatherPipeIntCheck = true;
|
||||||
// from PC. Update PC with the latest value in case that happens.
|
|
||||||
MOV(32, PPCSTATE(pc), Imm32(op.address));
|
|
||||||
OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE));
|
|
||||||
WriteExceptionExit();
|
|
||||||
}
|
}
|
||||||
SwitchToNearCode();
|
|
||||||
|
|
||||||
js.firstFPInstructionFound = true;
|
// Gather pipe writes can generate an exception; add an exception check.
|
||||||
|
// TODO: This doesn't really match hardware; the CP interrupt is
|
||||||
|
// asynchronous.
|
||||||
|
if (gatherPipeIntCheck)
|
||||||
|
{
|
||||||
|
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT));
|
||||||
|
FixupBranch extException = J_CC(CC_NZ, Jump::Near);
|
||||||
|
|
||||||
|
SwitchToFarCode();
|
||||||
|
SetJumpTarget(extException);
|
||||||
|
TEST(32, PPCSTATE(msr), Imm32(0x0008000));
|
||||||
|
FixupBranch noExtIntEnable = J_CC(CC_Z, Jump::Near);
|
||||||
|
MOV(64, R(RSCRATCH), ImmPtr(&m_system.GetProcessorInterface().m_interrupt_cause));
|
||||||
|
TEST(32, MatR(RSCRATCH),
|
||||||
|
Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN |
|
||||||
|
ProcessorInterface::INT_CAUSE_PE_FINISH));
|
||||||
|
FixupBranch noCPInt = J_CC(CC_Z, Jump::Near);
|
||||||
|
|
||||||
|
{
|
||||||
|
RCForkGuard gpr_guard = gpr.Fork();
|
||||||
|
RCForkGuard fpr_guard = fpr.Fork();
|
||||||
|
|
||||||
|
gpr.Flush();
|
||||||
|
fpr.Flush();
|
||||||
|
|
||||||
|
MOV(32, PPCSTATE(pc), Imm32(op.address));
|
||||||
|
WriteExternalExceptionExit();
|
||||||
|
}
|
||||||
|
SwitchToNearCode();
|
||||||
|
SetJumpTarget(noCPInt);
|
||||||
|
SetJumpTarget(noExtIntEnable);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bJITRegisterCacheOff)
|
if (HandleFunctionHooking(op.address))
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (op.skip)
|
||||||
{
|
{
|
||||||
gpr.Flush();
|
if (IsDebuggingEnabled())
|
||||||
fpr.Flush();
|
{
|
||||||
|
// The only thing that currently sets op.skip is the BLR following optimization.
|
||||||
|
// If any non-branch instruction starts setting that too, this will need to be changed.
|
||||||
|
ASSERT(op.inst.hex == 0x4e800020);
|
||||||
|
WriteBranchWatch<true>(op.address, op.branchTo, op.inst, RSCRATCH, RSCRATCH2,
|
||||||
|
CallerSavedRegistersInUse());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// If we have an input register that is going to be used again, load it pre-emptively,
|
auto& cpu = m_system.GetCPU();
|
||||||
// even if the instruction doesn't strictly need it in a register, to avoid redundant
|
auto& power_pc = m_system.GetPowerPC();
|
||||||
// loads later. Of course, don't do this if we're already out of registers.
|
if (IsDebuggingEnabled() && power_pc.GetBreakPoints().IsAddressBreakPoint(op.address) &&
|
||||||
// As a bit of a heuristic, make sure we have at least one register left over for the
|
!cpu.IsStepping())
|
||||||
// output, which needs to be bound in the actual instruction compilation.
|
|
||||||
// TODO: make this smarter in the case that we're actually register-starved, i.e.
|
|
||||||
// prioritize the more important registers.
|
|
||||||
gpr.PreloadRegisters(op.regsIn & op.gprInUse & ~op.gprDiscardable);
|
|
||||||
fpr.PreloadRegisters(op.fregsIn & op.fprInXmm & ~op.fprDiscardable);
|
|
||||||
}
|
|
||||||
|
|
||||||
CompileInstruction(op);
|
|
||||||
|
|
||||||
js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst;
|
|
||||||
|
|
||||||
if (jo.memcheck && (opinfo->flags & FL_LOADSTORE))
|
|
||||||
{
|
|
||||||
// If we have a fastmem loadstore, we can omit the exception check and let fastmem handle
|
|
||||||
// it.
|
|
||||||
FixupBranch memException;
|
|
||||||
ASSERT_MSG(DYNA_REC, !(js.fastmemLoadStore && js.fixupExceptionHandler),
|
|
||||||
"Fastmem loadstores shouldn't have exception handler fixups (PC={:x})!",
|
|
||||||
op.address);
|
|
||||||
if (!js.fastmemLoadStore && !js.fixupExceptionHandler)
|
|
||||||
{
|
{
|
||||||
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
|
gpr.Flush();
|
||||||
memException = J_CC(CC_NZ, Jump::Near);
|
fpr.Flush();
|
||||||
|
|
||||||
|
MOV(32, PPCSTATE(pc), Imm32(op.address));
|
||||||
|
ABI_PushRegistersAndAdjustStack({}, 0);
|
||||||
|
ABI_CallFunctionP(PowerPC::CheckAndHandleBreakPointsFromJIT, &power_pc);
|
||||||
|
ABI_PopRegistersAndAdjustStack({}, 0);
|
||||||
|
MOV(64, R(RSCRATCH), ImmPtr(cpu.GetStatePtr()));
|
||||||
|
CMP(32, MatR(RSCRATCH), Imm32(Common::ToUnderlying(CPU::State::Running)));
|
||||||
|
FixupBranch noBreakpoint = J_CC(CC_E);
|
||||||
|
|
||||||
|
Cleanup();
|
||||||
|
MOV(32, PPCSTATE(npc), Imm32(op.address));
|
||||||
|
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
|
||||||
|
JMP(asm_routines.dispatcher_exit, Jump::Near);
|
||||||
|
|
||||||
|
SetJumpTarget(noBreakpoint);
|
||||||
}
|
}
|
||||||
|
|
||||||
SwitchToFarCode();
|
if ((opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound)
|
||||||
if (!js.fastmemLoadStore)
|
|
||||||
{
|
{
|
||||||
m_exception_handler_at_loc[js.fastmemLoadStore] = nullptr;
|
// This instruction uses FPU - needs to add FP exception bailout
|
||||||
SetJumpTarget(js.fixupExceptionHandler ? js.exceptionHandler : memException);
|
TEST(32, PPCSTATE(msr), Imm32(1 << 13)); // Test FP enabled bit
|
||||||
|
FixupBranch b1 = J_CC(CC_Z, Jump::Near);
|
||||||
|
|
||||||
|
SwitchToFarCode();
|
||||||
|
SetJumpTarget(b1);
|
||||||
|
{
|
||||||
|
RCForkGuard gpr_guard = gpr.Fork();
|
||||||
|
RCForkGuard fpr_guard = fpr.Fork();
|
||||||
|
|
||||||
|
gpr.Flush();
|
||||||
|
fpr.Flush();
|
||||||
|
|
||||||
|
// If a FPU exception occurs, the exception handler will read
|
||||||
|
// from PC. Update PC with the latest value in case that happens.
|
||||||
|
MOV(32, PPCSTATE(pc), Imm32(op.address));
|
||||||
|
OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE));
|
||||||
|
WriteExceptionExit();
|
||||||
|
}
|
||||||
|
SwitchToNearCode();
|
||||||
|
|
||||||
|
js.firstFPInstructionFound = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bJITRegisterCacheOff)
|
||||||
|
{
|
||||||
|
gpr.Flush();
|
||||||
|
fpr.Flush();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
m_exception_handler_at_loc[js.fastmemLoadStore] = GetWritableCodePtr();
|
// If we have an input register that is going to be used again, load it pre-emptively,
|
||||||
|
// even if the instruction doesn't strictly need it in a register, to avoid redundant
|
||||||
|
// loads later. Of course, don't do this if we're already out of registers.
|
||||||
|
// As a bit of a heuristic, make sure we have at least one register left over for the
|
||||||
|
// output, which needs to be bound in the actual instruction compilation.
|
||||||
|
// TODO: make this smarter in the case that we're actually register-starved, i.e.
|
||||||
|
// prioritize the more important registers.
|
||||||
|
gpr.PreloadRegisters(op.regsIn & op.gprInUse & ~op.gprDiscardable);
|
||||||
|
fpr.PreloadRegisters(op.fregsIn & op.fprInXmm & ~op.fprDiscardable);
|
||||||
}
|
}
|
||||||
|
|
||||||
RCForkGuard gpr_guard = gpr.Fork();
|
CompileInstruction(op);
|
||||||
RCForkGuard fpr_guard = fpr.Fork();
|
|
||||||
|
|
||||||
gpr.Revert();
|
js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst;
|
||||||
fpr.Revert();
|
|
||||||
gpr.Flush();
|
|
||||||
fpr.Flush();
|
|
||||||
|
|
||||||
MOV(32, PPCSTATE(pc), Imm32(op.address));
|
if (jo.memcheck && (opinfo->flags & FL_LOADSTORE))
|
||||||
WriteExceptionExit();
|
{
|
||||||
SwitchToNearCode();
|
// If we have a fastmem loadstore, we can omit the exception check and let fastmem handle
|
||||||
|
// it.
|
||||||
|
FixupBranch memException;
|
||||||
|
ASSERT_MSG(DYNA_REC, !(js.fastmemLoadStore && js.fixupExceptionHandler),
|
||||||
|
"Fastmem loadstores shouldn't have exception handler fixups (PC={:x})!",
|
||||||
|
op.address);
|
||||||
|
if (!js.fastmemLoadStore && !js.fixupExceptionHandler)
|
||||||
|
{
|
||||||
|
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
|
||||||
|
memException = J_CC(CC_NZ, Jump::Near);
|
||||||
|
}
|
||||||
|
|
||||||
|
SwitchToFarCode();
|
||||||
|
if (!js.fastmemLoadStore)
|
||||||
|
{
|
||||||
|
m_exception_handler_at_loc[js.fastmemLoadStore] = nullptr;
|
||||||
|
SetJumpTarget(js.fixupExceptionHandler ? js.exceptionHandler : memException);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
m_exception_handler_at_loc[js.fastmemLoadStore] = GetWritableCodePtr();
|
||||||
|
}
|
||||||
|
|
||||||
|
RCForkGuard gpr_guard = gpr.Fork();
|
||||||
|
RCForkGuard fpr_guard = fpr.Fork();
|
||||||
|
|
||||||
|
gpr.Revert();
|
||||||
|
fpr.Revert();
|
||||||
|
gpr.Flush();
|
||||||
|
fpr.Flush();
|
||||||
|
|
||||||
|
MOV(32, PPCSTATE(pc), Imm32(op.address));
|
||||||
|
WriteExceptionExit();
|
||||||
|
SwitchToNearCode();
|
||||||
|
}
|
||||||
|
|
||||||
|
gpr.Commit();
|
||||||
|
fpr.Commit();
|
||||||
|
|
||||||
|
if (opinfo->flags & FL_LOADSTORE)
|
||||||
|
++js.numLoadStoreInst;
|
||||||
|
|
||||||
|
if (opinfo->flags & FL_USE_FPU)
|
||||||
|
++js.numFloatingPointInst;
|
||||||
}
|
}
|
||||||
|
|
||||||
gpr.Commit();
|
|
||||||
fpr.Commit();
|
|
||||||
|
|
||||||
// If we have a register that will never be used again, discard or flush it.
|
|
||||||
if (!bJITRegisterCacheOff)
|
|
||||||
{
|
|
||||||
gpr.Discard(op.gprDiscardable);
|
|
||||||
fpr.Discard(op.fprDiscardable);
|
|
||||||
}
|
|
||||||
gpr.Flush(~op.gprInUse & previous_gpr_in_use);
|
|
||||||
fpr.Flush(~op.fprInUse & previous_fpr_in_use);
|
|
||||||
|
|
||||||
previous_gpr_in_use = op.gprInUse;
|
|
||||||
previous_fpr_in_use = op.fprInUse;
|
|
||||||
|
|
||||||
if (opinfo->flags & FL_LOADSTORE)
|
|
||||||
++js.numLoadStoreInst;
|
|
||||||
|
|
||||||
if (opinfo->flags & FL_USE_FPU)
|
|
||||||
++js.numFloatingPointInst;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst;
|
||||||
|
|
||||||
|
// If we have a register that will never be used again, discard or flush it.
|
||||||
|
if (!bJITRegisterCacheOff)
|
||||||
|
{
|
||||||
|
gpr.Discard(op.gprDiscardable);
|
||||||
|
fpr.Discard(op.fprDiscardable);
|
||||||
|
}
|
||||||
|
gpr.Flush(~op.gprInUse & previous_gpr_in_use);
|
||||||
|
fpr.Flush(~op.fprInUse & previous_fpr_in_use);
|
||||||
|
|
||||||
|
previous_gpr_in_use = op.gprInUse;
|
||||||
|
previous_fpr_in_use = op.fprInUse;
|
||||||
|
|
||||||
#if defined(_DEBUG) || defined(DEBUGFAST)
|
#if defined(_DEBUG) || defined(DEBUGFAST)
|
||||||
if (!gpr.SanityCheck() || !fpr.SanityCheck())
|
if (!gpr.SanityCheck() || !fpr.SanityCheck())
|
||||||
{
|
{
|
||||||
@ -1187,8 +1196,6 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
|||||||
NOTICE_LOG_FMT(DYNA_REC, "Unflushed register: {}", ppc_inst);
|
NOTICE_LOG_FMT(DYNA_REC, "Unflushed register: {}", ppc_inst);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
i += js.skipInstructions;
|
|
||||||
js.skipInstructions = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (code_block.m_broken)
|
if (code_block.m_broken)
|
||||||
|
@ -1188,193 +1188,197 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
|||||||
js.downcountAmount += opinfo->num_cycles;
|
js.downcountAmount += opinfo->num_cycles;
|
||||||
js.isLastInstruction = i == (code_block.m_num_instructions - 1);
|
js.isLastInstruction = i == (code_block.m_num_instructions - 1);
|
||||||
|
|
||||||
// Skip calling UpdateLastUsed for lmw/stmw - it usually hurts more than it helps
|
if (js.skipInstructions != 0)
|
||||||
if (op.inst.OPCD != 46 && op.inst.OPCD != 47)
|
|
||||||
gpr.UpdateLastUsed(op.regsIn | op.regsOut);
|
|
||||||
|
|
||||||
BitSet32 fpr_used = op.fregsIn;
|
|
||||||
if (op.fregOut >= 0)
|
|
||||||
fpr_used[op.fregOut] = true;
|
|
||||||
fpr.UpdateLastUsed(fpr_used);
|
|
||||||
|
|
||||||
if (i != 0)
|
|
||||||
{
|
{
|
||||||
// Gather pipe writes using a non-immediate address are discovered by profiling.
|
js.skipInstructions--;
|
||||||
const u32 prev_address = m_code_buffer[i - 1].address;
|
|
||||||
bool gatherPipeIntCheck = js.fifoWriteAddresses.contains(prev_address);
|
|
||||||
|
|
||||||
if (jo.optimizeGatherPipe &&
|
|
||||||
(js.fifoBytesSinceCheck >= GPFifo::GATHER_PIPE_SIZE || js.mustCheckFifo))
|
|
||||||
{
|
|
||||||
js.fifoBytesSinceCheck = 0;
|
|
||||||
js.mustCheckFifo = false;
|
|
||||||
|
|
||||||
gpr.Lock(ARM64Reg::W30);
|
|
||||||
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
|
|
||||||
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
|
|
||||||
regs_in_use[DecodeReg(ARM64Reg::W30)] = 0;
|
|
||||||
|
|
||||||
ABI_PushRegisters(regs_in_use);
|
|
||||||
m_float_emit.ABI_PushRegisters(fprs_in_use, ARM64Reg::X30);
|
|
||||||
ABI_CallFunction(&GPFifo::FastCheckGatherPipe, &m_system.GetGPFifo());
|
|
||||||
m_float_emit.ABI_PopRegisters(fprs_in_use, ARM64Reg::X30);
|
|
||||||
ABI_PopRegisters(regs_in_use);
|
|
||||||
|
|
||||||
gpr.Unlock(ARM64Reg::W30);
|
|
||||||
gatherPipeIntCheck = true;
|
|
||||||
}
|
|
||||||
// Gather pipe writes can generate an exception; add an exception check.
|
|
||||||
// TODO: This doesn't really match hardware; the CP interrupt is
|
|
||||||
// asynchronous.
|
|
||||||
if (jo.optimizeGatherPipe && gatherPipeIntCheck)
|
|
||||||
{
|
|
||||||
auto WA = gpr.GetScopedReg();
|
|
||||||
ARM64Reg XA = EncodeRegTo64(WA);
|
|
||||||
|
|
||||||
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
|
|
||||||
FixupBranch no_ext_exception = TBZ(WA, MathUtil::IntLog2(EXCEPTION_EXTERNAL_INT));
|
|
||||||
FixupBranch exception = B();
|
|
||||||
SwitchToFarCode();
|
|
||||||
const u8* done_here = GetCodePtr();
|
|
||||||
FixupBranch exit = B();
|
|
||||||
SetJumpTarget(exception);
|
|
||||||
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr));
|
|
||||||
TBZ(WA, 15, done_here); // MSR.EE
|
|
||||||
LDR(IndexType::Unsigned, WA, XA,
|
|
||||||
MOVPage2R(XA, &m_system.GetProcessorInterface().m_interrupt_cause));
|
|
||||||
constexpr u32 cause_mask = ProcessorInterface::INT_CAUSE_CP |
|
|
||||||
ProcessorInterface::INT_CAUSE_PE_TOKEN |
|
|
||||||
ProcessorInterface::INT_CAUSE_PE_FINISH;
|
|
||||||
TST(WA, LogicalImm(cause_mask, GPRSize::B32));
|
|
||||||
B(CC_EQ, done_here);
|
|
||||||
|
|
||||||
gpr.Flush(FlushMode::MaintainState, WA);
|
|
||||||
fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG);
|
|
||||||
WriteExceptionExit(js.compilerPC, true, true);
|
|
||||||
SwitchToNearCode();
|
|
||||||
SetJumpTarget(no_ext_exception);
|
|
||||||
SetJumpTarget(exit);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (HandleFunctionHooking(op.address))
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (op.skip)
|
|
||||||
{
|
|
||||||
if (IsDebuggingEnabled())
|
|
||||||
{
|
|
||||||
// The only thing that currently sets op.skip is the BLR following optimization.
|
|
||||||
// If any non-branch instruction starts setting that too, this will need to be changed.
|
|
||||||
ASSERT(op.inst.hex == 0x4e800020);
|
|
||||||
const auto bw_reg_a = gpr.GetScopedReg(), bw_reg_b = gpr.GetScopedReg();
|
|
||||||
const BitSet32 gpr_caller_save =
|
|
||||||
gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(bw_reg_a), DecodeReg(bw_reg_b)};
|
|
||||||
WriteBranchWatch<true>(op.address, op.branchTo, op.inst, bw_reg_a, bw_reg_b,
|
|
||||||
gpr_caller_save, fpr.GetCallerSavedUsed());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (IsDebuggingEnabled() && !cpu.IsStepping() &&
|
// Skip calling UpdateLastUsed for lmw/stmw - it usually hurts more than it helps
|
||||||
m_system.GetPowerPC().GetBreakPoints().IsAddressBreakPoint(op.address))
|
if (op.inst.OPCD != 46 && op.inst.OPCD != 47)
|
||||||
|
gpr.UpdateLastUsed(op.regsIn | op.regsOut);
|
||||||
|
|
||||||
|
BitSet32 fpr_used = op.fregsIn;
|
||||||
|
if (op.fregOut >= 0)
|
||||||
|
fpr_used[op.fregOut] = true;
|
||||||
|
fpr.UpdateLastUsed(fpr_used);
|
||||||
|
|
||||||
|
if (i != 0)
|
||||||
{
|
{
|
||||||
FlushCarry();
|
// Gather pipe writes using a non-immediate address are discovered by profiling.
|
||||||
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
const u32 prev_address = m_code_buffer[i - 1].address;
|
||||||
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
bool gatherPipeIntCheck = js.fifoWriteAddresses.contains(prev_address);
|
||||||
|
|
||||||
static_assert(PPCSTATE_OFF(pc) <= 252);
|
if (jo.optimizeGatherPipe &&
|
||||||
static_assert(PPCSTATE_OFF(pc) + 4 == PPCSTATE_OFF(npc));
|
(js.fifoBytesSinceCheck >= GPFifo::GATHER_PIPE_SIZE || js.mustCheckFifo))
|
||||||
|
|
||||||
MOVI2R(DISPATCHER_PC, op.address);
|
|
||||||
STP(IndexType::Signed, DISPATCHER_PC, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
|
|
||||||
ABI_CallFunction(&PowerPC::CheckAndHandleBreakPointsFromJIT, &m_system.GetPowerPC());
|
|
||||||
|
|
||||||
LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0,
|
|
||||||
MOVPage2R(ARM64Reg::X0, cpu.GetStatePtr()));
|
|
||||||
static_assert(Common::ToUnderlying(CPU::State::Running) == 0);
|
|
||||||
FixupBranch no_breakpoint = CBZ(ARM64Reg::W0);
|
|
||||||
|
|
||||||
Cleanup();
|
|
||||||
if (IsProfilingEnabled())
|
|
||||||
{
|
{
|
||||||
ABI_CallFunction(&JitBlock::ProfileData::EndProfiling, b->profile_data.get(),
|
js.fifoBytesSinceCheck = 0;
|
||||||
js.downcountAmount);
|
js.mustCheckFifo = false;
|
||||||
|
|
||||||
|
gpr.Lock(ARM64Reg::W30);
|
||||||
|
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
|
||||||
|
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
|
||||||
|
regs_in_use[DecodeReg(ARM64Reg::W30)] = 0;
|
||||||
|
|
||||||
|
ABI_PushRegisters(regs_in_use);
|
||||||
|
m_float_emit.ABI_PushRegisters(fprs_in_use, ARM64Reg::X30);
|
||||||
|
ABI_CallFunction(&GPFifo::FastCheckGatherPipe, &m_system.GetGPFifo());
|
||||||
|
m_float_emit.ABI_PopRegisters(fprs_in_use, ARM64Reg::X30);
|
||||||
|
ABI_PopRegisters(regs_in_use);
|
||||||
|
|
||||||
|
gpr.Unlock(ARM64Reg::W30);
|
||||||
|
gatherPipeIntCheck = true;
|
||||||
}
|
}
|
||||||
DoDownCount();
|
// Gather pipe writes can generate an exception; add an exception check.
|
||||||
B(dispatcher_exit);
|
// TODO: This doesn't really match hardware; the CP interrupt is
|
||||||
|
// asynchronous.
|
||||||
SetJumpTarget(no_breakpoint);
|
if (jo.optimizeGatherPipe && gatherPipeIntCheck)
|
||||||
}
|
|
||||||
|
|
||||||
if ((opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound)
|
|
||||||
{
|
|
||||||
FixupBranch b1;
|
|
||||||
// This instruction uses FPU - needs to add FP exception bailout
|
|
||||||
{
|
{
|
||||||
auto WA = gpr.GetScopedReg();
|
auto WA = gpr.GetScopedReg();
|
||||||
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr));
|
ARM64Reg XA = EncodeRegTo64(WA);
|
||||||
b1 = TBNZ(WA, 13); // Test FP enabled bit
|
|
||||||
|
|
||||||
FixupBranch far_addr = B();
|
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
|
||||||
|
FixupBranch no_ext_exception = TBZ(WA, MathUtil::IntLog2(EXCEPTION_EXTERNAL_INT));
|
||||||
|
FixupBranch exception = B();
|
||||||
SwitchToFarCode();
|
SwitchToFarCode();
|
||||||
SetJumpTarget(far_addr);
|
const u8* done_here = GetCodePtr();
|
||||||
|
FixupBranch exit = B();
|
||||||
|
SetJumpTarget(exception);
|
||||||
|
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr));
|
||||||
|
TBZ(WA, 15, done_here); // MSR.EE
|
||||||
|
LDR(IndexType::Unsigned, WA, XA,
|
||||||
|
MOVPage2R(XA, &m_system.GetProcessorInterface().m_interrupt_cause));
|
||||||
|
constexpr u32 cause_mask = ProcessorInterface::INT_CAUSE_CP |
|
||||||
|
ProcessorInterface::INT_CAUSE_PE_TOKEN |
|
||||||
|
ProcessorInterface::INT_CAUSE_PE_FINISH;
|
||||||
|
TST(WA, LogicalImm(cause_mask, GPRSize::B32));
|
||||||
|
B(CC_EQ, done_here);
|
||||||
|
|
||||||
gpr.Flush(FlushMode::MaintainState, WA);
|
gpr.Flush(FlushMode::MaintainState, WA);
|
||||||
fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG);
|
fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG);
|
||||||
|
WriteExceptionExit(js.compilerPC, true, true);
|
||||||
|
SwitchToNearCode();
|
||||||
|
SetJumpTarget(no_ext_exception);
|
||||||
|
SetJumpTarget(exit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
|
if (HandleFunctionHooking(op.address))
|
||||||
ORR(WA, WA, LogicalImm(EXCEPTION_FPU_UNAVAILABLE, GPRSize::B32));
|
break;
|
||||||
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
|
|
||||||
|
if (op.skip)
|
||||||
|
{
|
||||||
|
if (IsDebuggingEnabled())
|
||||||
|
{
|
||||||
|
// The only thing that currently sets op.skip is the BLR following optimization.
|
||||||
|
// If any non-branch instruction starts setting that too, this will need to be changed.
|
||||||
|
ASSERT(op.inst.hex == 0x4e800020);
|
||||||
|
const auto bw_reg_a = gpr.GetScopedReg(), bw_reg_b = gpr.GetScopedReg();
|
||||||
|
const BitSet32 gpr_caller_save =
|
||||||
|
gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(bw_reg_a), DecodeReg(bw_reg_b)};
|
||||||
|
WriteBranchWatch<true>(op.address, op.branchTo, op.inst, bw_reg_a, bw_reg_b,
|
||||||
|
gpr_caller_save, fpr.GetCallerSavedUsed());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (IsDebuggingEnabled() && !cpu.IsStepping() &&
|
||||||
|
m_system.GetPowerPC().GetBreakPoints().IsAddressBreakPoint(op.address))
|
||||||
|
{
|
||||||
|
FlushCarry();
|
||||||
|
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||||
|
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||||
|
|
||||||
|
static_assert(PPCSTATE_OFF(pc) <= 252);
|
||||||
|
static_assert(PPCSTATE_OFF(pc) + 4 == PPCSTATE_OFF(npc));
|
||||||
|
|
||||||
|
MOVI2R(DISPATCHER_PC, op.address);
|
||||||
|
STP(IndexType::Signed, DISPATCHER_PC, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
|
||||||
|
ABI_CallFunction(&PowerPC::CheckAndHandleBreakPointsFromJIT, &m_system.GetPowerPC());
|
||||||
|
|
||||||
|
LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0,
|
||||||
|
MOVPage2R(ARM64Reg::X0, cpu.GetStatePtr()));
|
||||||
|
static_assert(Common::ToUnderlying(CPU::State::Running) == 0);
|
||||||
|
FixupBranch no_breakpoint = CBZ(ARM64Reg::W0);
|
||||||
|
|
||||||
|
Cleanup();
|
||||||
|
if (IsProfilingEnabled())
|
||||||
|
{
|
||||||
|
ABI_CallFunction(&JitBlock::ProfileData::EndProfiling, b->profile_data.get(),
|
||||||
|
js.downcountAmount);
|
||||||
|
}
|
||||||
|
DoDownCount();
|
||||||
|
B(dispatcher_exit);
|
||||||
|
|
||||||
|
SetJumpTarget(no_breakpoint);
|
||||||
}
|
}
|
||||||
|
|
||||||
WriteExceptionExit(js.compilerPC, false, true);
|
if ((opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound)
|
||||||
|
{
|
||||||
|
FixupBranch b1;
|
||||||
|
// This instruction uses FPU - needs to add FP exception bailout
|
||||||
|
{
|
||||||
|
auto WA = gpr.GetScopedReg();
|
||||||
|
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr));
|
||||||
|
b1 = TBNZ(WA, 13); // Test FP enabled bit
|
||||||
|
|
||||||
SwitchToNearCode();
|
FixupBranch far_addr = B();
|
||||||
|
SwitchToFarCode();
|
||||||
|
SetJumpTarget(far_addr);
|
||||||
|
|
||||||
SetJumpTarget(b1);
|
gpr.Flush(FlushMode::MaintainState, WA);
|
||||||
|
fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG);
|
||||||
|
|
||||||
js.firstFPInstructionFound = true;
|
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
|
||||||
|
ORR(WA, WA, LogicalImm(EXCEPTION_FPU_UNAVAILABLE, GPRSize::B32));
|
||||||
|
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
|
||||||
|
}
|
||||||
|
|
||||||
|
WriteExceptionExit(js.compilerPC, false, true);
|
||||||
|
|
||||||
|
SwitchToNearCode();
|
||||||
|
|
||||||
|
SetJumpTarget(b1);
|
||||||
|
|
||||||
|
js.firstFPInstructionFound = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bJITRegisterCacheOff)
|
||||||
|
{
|
||||||
|
FlushCarry();
|
||||||
|
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||||
|
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||||
|
}
|
||||||
|
|
||||||
|
CompileInstruction(op);
|
||||||
|
|
||||||
|
if (opinfo->flags & FL_LOADSTORE)
|
||||||
|
++js.numLoadStoreInst;
|
||||||
|
|
||||||
|
if (opinfo->flags & FL_USE_FPU)
|
||||||
|
++js.numFloatingPointInst;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bJITRegisterCacheOff)
|
|
||||||
{
|
|
||||||
FlushCarry();
|
|
||||||
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
|
||||||
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
|
||||||
}
|
|
||||||
|
|
||||||
CompileInstruction(op);
|
|
||||||
|
|
||||||
js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst;
|
|
||||||
|
|
||||||
if (!CanMergeNextInstructions(1) || js.op[1].opinfo->type != ::OpType::Integer)
|
|
||||||
FlushCarry();
|
|
||||||
|
|
||||||
// If we have a register that will never be used again, discard or flush it.
|
|
||||||
if (!bJITRegisterCacheOff)
|
|
||||||
{
|
|
||||||
gpr.DiscardRegisters(op.gprDiscardable);
|
|
||||||
fpr.DiscardRegisters(op.fprDiscardable);
|
|
||||||
gpr.DiscardCRRegisters(op.crDiscardable);
|
|
||||||
}
|
|
||||||
gpr.StoreRegisters(~op.gprInUse & previous_gpr_in_use);
|
|
||||||
fpr.StoreRegisters(~op.fprInUse & previous_fpr_in_use);
|
|
||||||
gpr.StoreCRRegisters(~op.crInUse & previous_cr_in_use);
|
|
||||||
|
|
||||||
previous_gpr_in_use = op.gprInUse;
|
|
||||||
previous_fpr_in_use = op.fprInUse;
|
|
||||||
previous_cr_in_use = op.crInUse;
|
|
||||||
|
|
||||||
if (opinfo->flags & FL_LOADSTORE)
|
|
||||||
++js.numLoadStoreInst;
|
|
||||||
|
|
||||||
if (opinfo->flags & FL_USE_FPU)
|
|
||||||
++js.numFloatingPointInst;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
i += js.skipInstructions;
|
js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst;
|
||||||
js.skipInstructions = 0;
|
|
||||||
|
if (!CanMergeNextInstructions(1) || js.op[1].opinfo->type != ::OpType::Integer)
|
||||||
|
FlushCarry();
|
||||||
|
|
||||||
|
// If we have a register that will never be used again, discard or flush it.
|
||||||
|
if (!bJITRegisterCacheOff)
|
||||||
|
{
|
||||||
|
gpr.DiscardRegisters(op.gprDiscardable);
|
||||||
|
fpr.DiscardRegisters(op.fprDiscardable);
|
||||||
|
gpr.DiscardCRRegisters(op.crDiscardable);
|
||||||
|
}
|
||||||
|
gpr.StoreRegisters(~op.gprInUse & previous_gpr_in_use);
|
||||||
|
fpr.StoreRegisters(~op.fprInUse & previous_fpr_in_use);
|
||||||
|
gpr.StoreCRRegisters(~op.crInUse & previous_cr_in_use);
|
||||||
|
|
||||||
|
previous_gpr_in_use = op.gprInUse;
|
||||||
|
previous_fpr_in_use = op.fprInUse;
|
||||||
|
previous_cr_in_use = op.crInUse;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (code_block.m_broken)
|
if (code_block.m_broken)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user