diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 1a5a8e36a9..ba8c946e14 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -7,6 +7,7 @@ #include "Common/PerformanceCounter.h" #include "Core/PatchEngine.h" +#include "Core/HW/ProcessorInterface.h" #include "Core/PowerPC/JitInterface.h" #include "Core/PowerPC/Profiler.h" #include "Core/PowerPC/JitArm64/Jit.h" @@ -137,6 +138,17 @@ void JitArm64::Break(UGeckoInstruction inst) exit(0); } +void JitArm64::Cleanup() +{ + if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0) + { + gpr.Lock(W0); + MOVI2R(X0, (u64)&GPFifo::FastCheckGatherPipe); + BLR(X0); + gpr.Unlock(W0); + } +} + void JitArm64::DoDownCount() { ARM64Reg WA = gpr.GetReg(); @@ -160,6 +172,7 @@ void JitArm64::DoDownCount() // Exits void JitArm64::WriteExit(u32 destination) { + Cleanup(); DoDownCount(); if (Profiler::g_ProfileBlocks) @@ -188,6 +201,7 @@ void JitArm64::WriteExceptionExit(ARM64Reg dest) STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(pc)); STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(npc)); gpr.Unlock(dest); + Cleanup(); DoDownCount(); if (Profiler::g_ProfileBlocks) @@ -204,6 +218,7 @@ void JitArm64::WriteExceptionExit(ARM64Reg dest) void JitArm64::WriteExceptionExit() { + Cleanup(); DoDownCount(); if (Profiler::g_ProfileBlocks) @@ -224,10 +239,31 @@ void JitArm64::WriteExceptionExit() gpr.Unlock(WA); } +void JitArm64::WriteExternalExceptionExit(ARM64Reg dest) +{ + STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(pc)); + STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(npc)); + gpr.Unlock(dest); + Cleanup(); + DoDownCount(); + + if (Profiler::g_ProfileBlocks) + EndTimeProfile(js.curBlock); + + MOVI2R(EncodeRegTo64(dest), (u64)&PowerPC::CheckExternalExceptions); + BLR(EncodeRegTo64(dest)); + LDR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(npc)); + STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(pc)); + + MOVI2R(EncodeRegTo64(dest), (u64)asm_routines.dispatcher); + BR(EncodeRegTo64(dest)); +} + void JitArm64::WriteExitDestInR(ARM64Reg Reg) { STR(INDEX_UNSIGNED, Reg, X29, PPCSTATE_OFF(pc)); gpr.Unlock(Reg); + Cleanup(); DoDownCount(); if (Profiler::g_ProfileBlocks) @@ -450,6 +486,9 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB js.isLastInstruction = true; } + // Gather pipe writes using a non-immediate address are discovered by profiling. + bool gatherPipeIntCheck = jit->js.fifoWriteAddresses.find(ops[i].address) != jit->js.fifoWriteAddresses.end(); + if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32) { js.fifoBytesThisBlock -= 32; @@ -458,11 +497,65 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); regs_in_use[W30] = 0; - ABI_PushRegisters(regs_in_use); - MOVI2R(X30, (u64)&GPFifo::FastCheckGatherPipe); - BLR(X30); - ABI_PopRegisters(regs_in_use); + FixupBranch Exception = B(); + SwitchToFarCode(); + const u8* done_here = GetCodePtr(); + FixupBranch exit = B(); + SetJumpTarget(Exception); + ABI_PushRegisters(regs_in_use); + MOVI2R(X30, (u64)&GPFifo::FastCheckGatherPipe); + BLR(X30); + ABI_PopRegisters(regs_in_use); + + // Inline exception check + LDR(INDEX_UNSIGNED, W30, X29, PPCSTATE_OFF(Exceptions)); + TBZ(W30, 3, done_here); // EXCEPTION_EXTERNAL_INT + LDR(INDEX_UNSIGNED, W30, X29, PPCSTATE_OFF(msr)); + TBZ(W30, 11, done_here); + MOVI2R(X30, (u64)&ProcessorInterface::m_InterruptCause); + LDR(INDEX_UNSIGNED, W30, X30, 0); + TST(W30, 23, 2); + B(CC_EQ, done_here); + + gpr.Flush(FLUSH_MAINTAIN_STATE); + fpr.Flush(FLUSH_MAINTAIN_STATE); + MOVI2R(W30, ops[i].address); + WriteExternalExceptionExit(W30); + SwitchToNearCode(); + SetJumpTarget(exit); gpr.Unlock(W30); + + // So we don't check exceptions twice + gatherPipeIntCheck = false; + } + // Gather pipe writes can generate an exception; add an exception check. + // TODO: This doesn't really match hardware; the CP interrupt is + // asynchronous. + if (jo.optimizeGatherPipe && gatherPipeIntCheck) + { + ARM64Reg WA = gpr.GetReg(); + ARM64Reg XA = EncodeRegTo64(WA); + LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(Exceptions)); + FixupBranch NoExtException = TBZ(WA, 3); // EXCEPTION_EXTERNAL_INT + FixupBranch Exception = B(); + SwitchToFarCode(); + const u8* done_here = GetCodePtr(); + FixupBranch exit = B(); + SetJumpTarget(Exception); + LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(msr)); + TBZ(WA, 11, done_here); + MOVI2R(XA, (u64)&ProcessorInterface::m_InterruptCause); + LDR(INDEX_UNSIGNED, WA, XA, 0); + TST(WA, 23, 2); + B(CC_EQ, done_here); + + gpr.Flush(FLUSH_MAINTAIN_STATE); + fpr.Flush(FLUSH_MAINTAIN_STATE); + MOVI2R(WA, ops[i].address); + WriteExternalExceptionExit(WA); + SwitchToNearCode(); + SetJumpTarget(NoExtException); + SetJumpTarget(exit); } if (!ops[i].skip) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index c02b196082..91d4de7782 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -273,6 +273,7 @@ private: const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlock *b); void DoDownCount(); + void Cleanup(); // Profiling void BeginTimeProfile(JitBlock* b); @@ -282,6 +283,7 @@ private: void WriteExit(u32 destination); void WriteExceptionExit(Arm64Gen::ARM64Reg dest); void WriteExceptionExit(); + void WriteExternalExceptionExit(ARM64Reg dest); void WriteExitDestInR(Arm64Gen::ARM64Reg dest); FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 5708348e21..9b2223ddce 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -278,18 +278,18 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx) { - if (!(access_address >= (uintptr_t)Memory::physical_base && access_address < (uintptr_t)Memory::physical_base + 0x100010000) && - !(access_address >= (uintptr_t)Memory::logical_base && access_address < (uintptr_t)Memory::logical_base + 0x100010000)) + if (!IsInSpace((u8*)ctx->CTX_PC)) { - ERROR_LOG(DYNA_REC, "Exception handler - access below memory space. PC: 0x%016llx 0x%016lx < 0x%016lx", ctx->CTX_PC, access_address, (uintptr_t)Memory::physical_base); + ERROR_LOG(DYNA_REC, "Backpatch location not within codespace 0x%016llx(0x%08x)", ctx->CTX_PC, Common::swap32(*(u32*)ctx->CTX_PC)); DoBacktrace(access_address, ctx); return false; } - if (!IsInSpace((u8*)ctx->CTX_PC)) + if (!(access_address >= (uintptr_t)Memory::physical_base && access_address < (uintptr_t)Memory::physical_base + 0x100010000) && + !(access_address >= (uintptr_t)Memory::logical_base && access_address < (uintptr_t)Memory::logical_base + 0x100010000)) { - ERROR_LOG(DYNA_REC, "Backpatch location not within codespace 0x%016llx(0x%08x)", ctx->CTX_PC, Common::swap32(*(u32*)ctx->CTX_PC)); + ERROR_LOG(DYNA_REC, "Exception handler - access below memory space. PC: 0x%016llx 0x%016lx < 0x%016lx", ctx->CTX_PC, access_address, (uintptr_t)Memory::physical_base); DoBacktrace(access_address, ctx); return false; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 52cf6ce94f..a9fb0609c5 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -285,10 +285,64 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s if (is_immediate) mmio_address = PowerPC::IsOptimizableMMIOAccess(imm_addr, access_size); - if (is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) + if (is_immediate && jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(imm_addr)) + { + ARM64Reg WA = INVALID_REG; + int accessSize; + if (flags & BackPatchInfo::FLAG_SIZE_32) + accessSize = 32; + else if (flags & BackPatchInfo::FLAG_SIZE_16) + accessSize = 16; + else + accessSize = 8; + + if (accessSize != 8) + WA = gpr.GetReg(); + + u64 base_ptr = std::min((u64)&GPFifo::m_gatherPipeCount, (u64)&GPFifo::m_gatherPipe); + u32 count_off = (u64)&GPFifo::m_gatherPipeCount - base_ptr; + u32 pipe_off = (u64)&GPFifo::m_gatherPipe - base_ptr; + + MOVI2R(X30, base_ptr); + + if (pipe_off) + ADD(X1, X30, pipe_off); + + LDR(INDEX_UNSIGNED, W0, X30, count_off); + if (accessSize == 32) + { + REV32(WA, RS); + if (pipe_off) + STR(WA, X1, ArithOption(X0)); + else + STR(WA, X30, ArithOption(X0)); + } + else if (accessSize == 16) + { + REV16(WA, RS); + if (pipe_off) + STRH(WA, X1, ArithOption(X0)); + else + STRH(WA, X30, ArithOption(X0)); + } + else + { + if (pipe_off) + STRB(RS, X1, ArithOption(X0)); + else + STRB(RS, X30, ArithOption(X0)); + + } + ADD(W0, W0, accessSize >> 3); + STR(INDEX_UNSIGNED, W0, X30, count_off); + js.fifoBytesThisBlock += accessSize >> 3; + + if (accessSize != 8) + gpr.Unlock(WA); + } + else if (is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) { MOVI2R(XA, imm_addr); - EmitBackpatchRoutine(flags, true, false, RS, XA, BitSet32(0), BitSet32(0)); } else if (mmio_address && !(flags & BackPatchInfo::FLAG_REVERSE)) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index 08f3550050..0c2e6f29fd 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -422,7 +422,7 @@ void JitArm64::stfXX(UGeckoInstruction inst) } ADD(W0, W0, accessSize >> 3); STR(INDEX_UNSIGNED, W0, X30, count_off); - jit->js.fifoBytesThisBlock += accessSize >> 3; + js.fifoBytesThisBlock += accessSize >> 3; if (update) {