diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index 2c2ede99ea..d7c53ba289 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -1389,6 +1389,10 @@ void ARM64XEmitter::UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) { EncodeData3SrcInst(5, Rd, Rn, Rm, Ra); } +void ARM64XEmitter::UMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + UMADDL(Rd, Rn, Rm, SP); +} void ARM64XEmitter::UMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) { EncodeData3SrcInst(6, Rd, Rn, Rm, Ra); diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index 9041cd32bb..098b48fd25 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -517,6 +517,7 @@ public: void SMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); void SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); + void UMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void UMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); void UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void MUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 8f2f7e8568..ee7e44b39c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -86,9 +86,9 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst) // also flush the program counter ARM64Reg WA = gpr.GetReg(); MOVI2R(WA, js.compilerPC); - STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(pc)); + STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(pc)); ADD(WA, WA, 4); - STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(npc)); + STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(npc)); gpr.Unlock(WA); } @@ -102,14 +102,14 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst) if (js.isLastInstruction) { ARM64Reg WA = gpr.GetReg(); - LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(npc)); + LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(npc)); WriteExceptionExit(WA); } else { // only exit if ppcstate.npc was changed ARM64Reg WA = gpr.GetReg(); - LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(npc)); + LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(npc)); ARM64Reg WB = gpr.GetReg(); MOVI2R(WB, js.compilerPC + 4); CMP(WB, WA); @@ -132,8 +132,8 @@ void JitArm64::HLEFunction(UGeckoInstruction inst) BLR(X30); ARM64Reg WA = gpr.GetReg(); - LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(npc)); - WriteExitDestInR(WA); + LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(npc)); + WriteExit(WA); } void JitArm64::DoNothing(UGeckoInstruction inst) @@ -161,18 +161,18 @@ void JitArm64::Cleanup() void JitArm64::DoDownCount() { ARM64Reg WA = gpr.GetReg(); - LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(downcount)); + LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(downcount)); if (js.downcountAmount < 4096) // We can enlarge this if we used rotations { SUBS(WA, WA, js.downcountAmount); - STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(downcount)); + STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(downcount)); } else { ARM64Reg WB = gpr.GetReg(); MOVI2R(WB, js.downcountAmount); SUBS(WA, WA, WB); - STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(downcount)); + STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(downcount)); gpr.Unlock(WB); } gpr.Unlock(WA); @@ -193,93 +193,87 @@ void JitArm64::WriteExit(u32 destination) linkData.exitAddress = destination; linkData.exitPtrs = GetWritableCodePtr(); linkData.linkStatus = false; + b->linkData.push_back(linkData); // the code generated in JitArm64BlockCache::WriteDestroyBlock must fit in this block - ARM64Reg WA = gpr.GetReg(); - ARM64Reg XA = EncodeRegTo64(WA); - MOVI2R(WA, destination); - STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(pc)); - MOVI2R(XA, (u64)asm_routines.dispatcher); - BR(XA); - gpr.Unlock(WA); - - b->linkData.push_back(linkData); -} -void JitArm64::WriteExceptionExit(ARM64Reg dest) -{ - STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(pc)); - STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(npc)); - gpr.Unlock(dest); - Cleanup(); - DoDownCount(); - - if (Profiler::g_ProfileBlocks) - EndTimeProfile(js.curBlock); - - MOVI2R(EncodeRegTo64(dest), (u64)&PowerPC::CheckExceptions); - BLR(EncodeRegTo64(dest)); - LDR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(npc)); - STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(pc)); - - MOVI2R(EncodeRegTo64(dest), (u64)asm_routines.dispatcher); - BR(EncodeRegTo64(dest)); + MOVI2R(X30, (u64)asm_routines.dispatcher); + MOVI2R(DISPATCHER_PC, destination); + BR(X30); } -void JitArm64::WriteExceptionExit() +void JitArm64::WriteExit(ARM64Reg Reg) { Cleanup(); DoDownCount(); - if (Profiler::g_ProfileBlocks) - EndTimeProfile(js.curBlock); - - ARM64Reg WA = gpr.GetReg(); - ARM64Reg XA = EncodeRegTo64(WA); - LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(pc)); - STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(npc)); - MOVI2R(XA, (u64)&PowerPC::CheckExceptions); - BLR(XA); - LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(npc)); - STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(pc)); - - MOVI2R(XA, (u64)asm_routines.dispatcher); - BR(XA); - - gpr.Unlock(WA); -} - -void JitArm64::WriteExternalExceptionExit(ARM64Reg dest) -{ - STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(pc)); - STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(npc)); - gpr.Unlock(dest); - Cleanup(); - DoDownCount(); - - if (Profiler::g_ProfileBlocks) - EndTimeProfile(js.curBlock); - - MOVI2R(EncodeRegTo64(dest), (u64)&PowerPC::CheckExternalExceptions); - BLR(EncodeRegTo64(dest)); - LDR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(npc)); - STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(pc)); - - MOVI2R(EncodeRegTo64(dest), (u64)asm_routines.dispatcher); - BR(EncodeRegTo64(dest)); -} - -void JitArm64::WriteExitDestInR(ARM64Reg Reg) -{ - STR(INDEX_UNSIGNED, Reg, X29, PPCSTATE_OFF(pc)); + if (Reg != DISPATCHER_PC) + MOV(DISPATCHER_PC, Reg); gpr.Unlock(Reg); - Cleanup(); - DoDownCount(); if (Profiler::g_ProfileBlocks) EndTimeProfile(js.curBlock); - MOVI2R(EncodeRegTo64(Reg), (u64)asm_routines.dispatcher); - BR(EncodeRegTo64(Reg)); + MOVI2R(X30, (u64)asm_routines.dispatcher); + BR(X30); +} + +void JitArm64::WriteExceptionExit(u32 destination, bool only_external) +{ + Cleanup(); + DoDownCount(); + + LDR(INDEX_UNSIGNED, W30, PPC_REG, PPCSTATE_OFF(Exceptions)); + MOVI2R(DISPATCHER_PC, destination); + FixupBranch no_exceptions = CBZ(W30); + + STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc)); + STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(npc)); + if (only_external) + MOVI2R(X30, (u64)&PowerPC::CheckExternalExceptions); + else + MOVI2R(X30, (u64)&PowerPC::CheckExceptions); + BLR(X30); + LDR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(npc)); + + SetJumpTarget(no_exceptions); + + if (Profiler::g_ProfileBlocks) + EndTimeProfile(js.curBlock); + + MOVI2R(X30, (u64)asm_routines.dispatcher); + BR(X30); +} + +void JitArm64::WriteExceptionExit(ARM64Reg dest, bool only_external) +{ + Cleanup(); + DoDownCount(); + + ARM64Reg WA = gpr.GetReg(); + LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); + FixupBranch no_exceptions = CBZ(WA); + gpr.Unlock(WA); + + STR(INDEX_UNSIGNED, dest, PPC_REG, PPCSTATE_OFF(pc)); + STR(INDEX_UNSIGNED, dest, PPC_REG, PPCSTATE_OFF(npc)); + if (only_external) + MOVI2R(EncodeRegTo64(dest), (u64)&PowerPC::CheckExternalExceptions); + else + MOVI2R(EncodeRegTo64(dest), (u64)&PowerPC::CheckExceptions); + BLR(EncodeRegTo64(dest)); + LDR(INDEX_UNSIGNED, dest, PPC_REG, PPCSTATE_OFF(npc)); + + SetJumpTarget(no_exceptions); + + if (dest != DISPATCHER_PC) + MOV(DISPATCHER_PC, dest); + gpr.Unlock(dest); + + if (Profiler::g_ProfileBlocks) + EndTimeProfile(js.curBlock); + + MOVI2R(X30, (u64)asm_routines.dispatcher); + BR(X30); } void JitArm64::DumpCode(const u8* start, const u8* end) @@ -420,8 +414,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB FixupBranch bail = B(CC_PL); ARM64Reg WA = gpr.GetReg(); ARM64Reg XA = EncodeRegTo64(WA); - MOVI2R(WA, js.blockStart); - STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(pc)); + MOVI2R(DISPATCHER_PC, js.blockStart); MOVI2R(XA, (u64)asm_routines.doTiming); BR(XA); gpr.Unlock(WA); @@ -449,13 +442,13 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB int gqr = *code_block.m_gqr_used.begin(); if (!code_block.m_gqr_modified[gqr] && !GQR(gqr)) { - LDR(INDEX_UNSIGNED, W0, X29, PPCSTATE_OFF(spr[SPR_GQR0]) + gqr * 4); + LDR(INDEX_UNSIGNED, W0, PPC_REG, PPCSTATE_OFF(spr[SPR_GQR0]) + gqr * 4); FixupBranch no_fail = CBZ(W0); FixupBranch fail = B(); SwitchToFarCode(); SetJumpTarget(fail); - MOVI2R(W0, js.blockStart); - STR(INDEX_UNSIGNED, W0, X29, PPCSTATE_OFF(pc)); + MOVI2R(DISPATCHER_PC, js.blockStart); + STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc)); MOVI2R(W0, (u32)JitInterface::ExceptionType::EXCEPTIONS_PAIRED_QUANTIZE); MOVI2R(X1, (u64)&JitInterface::CompileExceptionCheck); BLR(X1); @@ -473,10 +466,6 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB gpr.Start(js.gpa); fpr.Start(js.fpa); - // Setup memory base register - u8* base = UReg_MSR(MSR).DR ? Memory::logical_base : Memory::physical_base; - MOVI2R(X28, (u64)base); - if (!SConfig::GetInstance().bEnableDebugging) js.downcountAmount += PatchEngine::GetSpeedhackCycles(em_address); @@ -517,9 +506,9 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB ABI_PopRegisters(regs_in_use); // Inline exception check - LDR(INDEX_UNSIGNED, W30, X29, PPCSTATE_OFF(Exceptions)); + LDR(INDEX_UNSIGNED, W30, PPC_REG, PPCSTATE_OFF(Exceptions)); TBZ(W30, 3, done_here); // EXCEPTION_EXTERNAL_INT - LDR(INDEX_UNSIGNED, W30, X29, PPCSTATE_OFF(msr)); + LDR(INDEX_UNSIGNED, W30, PPC_REG, PPCSTATE_OFF(msr)); TBZ(W30, 11, done_here); MOVI2R(X30, (u64)&ProcessorInterface::m_InterruptCause); LDR(INDEX_UNSIGNED, W30, X30, 0); @@ -528,8 +517,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB gpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE); - MOVI2R(W30, ops[i].address); - WriteExternalExceptionExit(W30); + WriteExceptionExit(js.compilerPC, true); SwitchToNearCode(); SetJumpTarget(exit); gpr.Unlock(W30); @@ -544,24 +532,24 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB { ARM64Reg WA = gpr.GetReg(); ARM64Reg XA = EncodeRegTo64(WA); - LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(Exceptions)); + LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); FixupBranch NoExtException = TBZ(WA, 3); // EXCEPTION_EXTERNAL_INT FixupBranch Exception = B(); SwitchToFarCode(); const u8* done_here = GetCodePtr(); FixupBranch exit = B(); SetJumpTarget(Exception); - LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(msr)); + LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(msr)); TBZ(WA, 11, done_here); MOVI2R(XA, (u64)&ProcessorInterface::m_InterruptCause); LDR(INDEX_UNSIGNED, WA, XA, 0); TST(WA, 23, 2); B(CC_EQ, done_here); + gpr.Unlock(WA); gpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE); - MOVI2R(WA, ops[i].address); - WriteExternalExceptionExit(WA); + WriteExceptionExit(js.compilerPC, true); SwitchToNearCode(); SetJumpTarget(NoExtException); SetJumpTarget(exit); @@ -573,7 +561,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB { //This instruction uses FPU - needs to add FP exception bailout ARM64Reg WA = gpr.GetReg(); - LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(msr)); + LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(msr)); FixupBranch b1 = TBNZ(WA, 13); // Test FP enabled bit FixupBranch far = B(); @@ -583,12 +571,13 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB gpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE); - LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(Exceptions)); + LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); ORR(WA, WA, 26, 0); // EXCEPTION_FPU_UNAVAILABLE - STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(Exceptions)); + STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); - MOVI2R(WA, js.compilerPC); - WriteExceptionExit(WA); + gpr.Unlock(WA); + + WriteExceptionExit(js.compilerPC); SwitchToNearCode(); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 252a9e6004..cb6162ba87 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -17,14 +17,6 @@ #include "Core/PowerPC/JitArmCommon/BackPatch.h" #include "Core/PowerPC/JitCommon/JitBase.h" -#define PPCSTATE_OFF(elem) (offsetof(PowerPC::PowerPCState, elem)) - -// Some asserts to make sure we will be able to load everything -static_assert(PPCSTATE_OFF(spr[1023]) <= 16380, "LDR(32bit) can't reach the last SPR"); -static_assert((PPCSTATE_OFF(ps[0][0]) % 8) == 0, "LDR(64bit VFP) requires FPRs to be 8 byte aligned"); -static_assert(PPCSTATE_OFF(xer_ca) < 4096, "STRB can't store xer_ca!"); -static_assert(PPCSTATE_OFF(xer_so_ov) < 4096, "STRB can't store xer_so_ov!"); - class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock { public: @@ -95,6 +87,8 @@ public: void rlwnmx(UGeckoInstruction inst); void srawix(UGeckoInstruction inst); void mullwx(UGeckoInstruction inst); + void mulhwx(UGeckoInstruction inst); + void mulhwux(UGeckoInstruction inst); void addic(UGeckoInstruction inst); void mulli(UGeckoInstruction inst); void addzex(UGeckoInstruction inst); @@ -206,6 +200,7 @@ private: { nearcode = GetWritableCodePtr(); SetCodePtrUnsafe(farcode.GetWritableCodePtr()); + AlignCode16(); } void SwitchToNearCode() @@ -237,10 +232,9 @@ private: // Exits void WriteExit(u32 destination); - void WriteExceptionExit(Arm64Gen::ARM64Reg dest); - void WriteExceptionExit(); - void WriteExternalExceptionExit(ARM64Reg dest); - void WriteExitDestInR(Arm64Gen::ARM64Reg dest); + void WriteExit(Arm64Gen::ARM64Reg dest); + void WriteExceptionExit(u32 destination, bool only_external = false); + void WriteExceptionExit(Arm64Gen::ARM64Reg dest, bool only_external = false); FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp index 44833004c5..cc416fc97e 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp @@ -29,9 +29,8 @@ void JitArm64BlockCache::WriteDestroyBlock(const u8* location, u32 address) { // must fit within the code generated in JitArm64::WriteExit ARM64XEmitter emit((u8 *)location); - emit.MOVI2R(W0, address); emit.MOVI2R(X30, (u64)jit->GetAsmRoutines()->dispatcher); - emit.STR(INDEX_UNSIGNED, W0, X29, PPCSTATE_OFF(pc)); + emit.MOVI2R(DISPATCHER_PC, address); emit.BR(X30); emit.FlushIcache(); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 5f4421ee93..dbe4ff43ee 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -60,28 +60,28 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, { m_float_emit.FCVT(32, 64, D0, RS); m_float_emit.REV32(8, D0, D0); - m_float_emit.STR(32, D0, X28, addr); + m_float_emit.STR(32, D0, MEM_REG, addr); } else if (flags & BackPatchInfo::FLAG_SIZE_F32I) { m_float_emit.REV32(8, D0, RS); - m_float_emit.STR(32, D0, X28, addr); + m_float_emit.STR(32, D0, MEM_REG, addr); } else if (flags & BackPatchInfo::FLAG_SIZE_F32X2) { m_float_emit.FCVTN(32, D0, RS); m_float_emit.REV32(8, D0, D0); - m_float_emit.STR(64, Q0, X28, addr); + m_float_emit.STR(64, Q0, MEM_REG, addr); } else if (flags & BackPatchInfo::FLAG_SIZE_F32X2I) { m_float_emit.REV32(8, D0, RS); - m_float_emit.STR(64, Q0, X28, addr); + m_float_emit.STR(64, Q0, MEM_REG, addr); } else { m_float_emit.REV64(8, Q0, RS); - m_float_emit.STR(64, Q0, X28, addr); + m_float_emit.STR(64, Q0, MEM_REG, addr); } } else if (flags & BackPatchInfo::FLAG_LOAD && @@ -89,12 +89,12 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, { if (flags & BackPatchInfo::FLAG_SIZE_F32) { - m_float_emit.LDR(32, EncodeRegToDouble(RS), X28, addr); + m_float_emit.LDR(32, EncodeRegToDouble(RS), MEM_REG, addr); m_float_emit.REV32(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS)); } else { - m_float_emit.LDR(64, EncodeRegToDouble(RS), X28, addr); + m_float_emit.LDR(64, EncodeRegToDouble(RS), MEM_REG, addr); m_float_emit.REV64(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS)); } } @@ -107,27 +107,27 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, REV16(temp, RS); if (flags & BackPatchInfo::FLAG_SIZE_32) - STR(temp, X28, addr); + STR(temp, MEM_REG, addr); else if (flags & BackPatchInfo::FLAG_SIZE_16) - STRH(temp, X28, addr); + STRH(temp, MEM_REG, addr); else - STRB(RS, X28, addr); + STRB(RS, MEM_REG, addr); } else if (flags & BackPatchInfo::FLAG_ZERO_256) { // This literally only stores 32bytes of zeros to the target address - ADD(addr, addr, X28); + ADD(addr, addr, MEM_REG); STP(INDEX_SIGNED, ZR, ZR, addr, 0); STP(INDEX_SIGNED, ZR, ZR, addr, 16); } else { if (flags & BackPatchInfo::FLAG_SIZE_32) - LDR(RS, X28, addr); + LDR(RS, MEM_REG, addr); else if (flags & BackPatchInfo::FLAG_SIZE_16) - LDRH(RS, X28, addr); + LDRH(RS, MEM_REG, addr); else if (flags & BackPatchInfo::FLAG_SIZE_8) - LDRB(RS, X28, addr); + LDRB(RS, MEM_REG, addr); if (!(flags & BackPatchInfo::FLAG_REVERSE)) { diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp index 66519bc3c6..bda767d056 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp @@ -26,15 +26,13 @@ void JitArm64::sc(UGeckoInstruction inst) ARM64Reg WA = gpr.GetReg(); - LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(Exceptions)); + LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); ORR(WA, WA, 31, 0); // Same as WA | EXCEPTION_SYSCALL - STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(Exceptions)); + STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); - MOVI2R(WA, js.compilerPC + 4); - STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(pc)); + gpr.Unlock(WA); - // WA is unlocked in this function - WriteExceptionExit(WA); + WriteExceptionExit(js.compilerPC + 4); } void JitArm64::rfi(UGeckoInstruction inst) @@ -60,18 +58,18 @@ void JitArm64::rfi(UGeckoInstruction inst) MOVI2R(WA, (~mask) & clearMSR13); MOVI2R(WB, mask & clearMSR13); - LDR(INDEX_UNSIGNED, WC, X29, PPCSTATE_OFF(msr)); + LDR(INDEX_UNSIGNED, WC, PPC_REG, PPCSTATE_OFF(msr)); AND(WC, WC, WB, ArithOption(WC, ST_LSL, 0)); // rD = Masked MSR - LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_SRR1])); // rB contains SRR1 here + LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(spr[SPR_SRR1])); // rB contains SRR1 here AND(WA, WA, WB, ArithOption(WA, ST_LSL, 0)); // rB contains masked SRR1 here ORR(WA, WA, WC, ArithOption(WA, ST_LSL, 0)); // rB = Masked MSR OR masked SRR1 - STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(msr)); // STR rB in to rA + STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(msr)); // STR rB in to rA - LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_SRR0])); + LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(spr[SPR_SRR0])); gpr.Unlock(WB, WC); // WA is unlocked in this function @@ -94,10 +92,9 @@ void JitArm64::bx(UGeckoInstruction inst) if (inst.LK) { - u32 Jumpto = js.compilerPC + 4; ARM64Reg WA = gpr.GetReg(); - MOVI2R(WA, Jumpto); - STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_LR])); + MOVI2R(WA, js.compilerPC + 4); + STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(spr[SPR_LR])); gpr.Unlock(WA); } @@ -109,8 +106,9 @@ void JitArm64::bx(UGeckoInstruction inst) MOVI2R(XA, (u64)&CoreTiming::Idle); BLR(XA); - MOVI2R(WA, js.compilerPC); - WriteExceptionExit(WA); + gpr.Unlock(WA); + + WriteExceptionExit(js.compilerPC); } WriteExit(destination); @@ -125,9 +123,9 @@ void JitArm64::bcx(UGeckoInstruction inst) FixupBranch pCTRDontBranch; if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR { - LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_CTR])); + LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(spr[SPR_CTR])); SUBS(WA, WA, 1); - STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_CTR])); + STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(spr[SPR_CTR])); if (inst.BO & BO_BRANCH_IF_CTR_0) pCTRDontBranch = B(CC_NEQ); @@ -149,9 +147,8 @@ void JitArm64::bcx(UGeckoInstruction inst) if (inst.LK) { - u32 Jumpto = js.compilerPC + 4; - MOVI2R(WA, Jumpto); - STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_LR])); + MOVI2R(WA, js.compilerPC + 4); + STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(spr[SPR_LR])); } gpr.Unlock(WA); @@ -203,17 +200,16 @@ void JitArm64::bcctrx(UGeckoInstruction inst) if (inst.LK_3) { ARM64Reg WB = gpr.GetReg(); - u32 Jumpto = js.compilerPC + 4; - MOVI2R(WB, Jumpto); - STR(INDEX_UNSIGNED, WB, X29, PPCSTATE_OFF(spr[SPR_LR])); + MOVI2R(WB, js.compilerPC + 4); + STR(INDEX_UNSIGNED, WB, PPC_REG, PPCSTATE_OFF(spr[SPR_LR])); gpr.Unlock(WB); } ARM64Reg WA = gpr.GetReg(); - LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_CTR])); + LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(spr[SPR_CTR])); AND(WA, WA, 30, 29); // Wipe the bottom 2 bits. - WriteExitDestInR(WA); + WriteExit(WA); } void JitArm64::bclrx(UGeckoInstruction inst) @@ -225,9 +221,9 @@ void JitArm64::bclrx(UGeckoInstruction inst) FixupBranch pCTRDontBranch; if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR { - LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_CTR])); + LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(spr[SPR_CTR])); SUBS(WA, WA, 1); - STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_CTR])); + STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(spr[SPR_CTR])); if (inst.BO & BO_BRANCH_IF_CTR_0) pCTRDontBranch = B(CC_NEQ); @@ -246,22 +242,21 @@ void JitArm64::bclrx(UGeckoInstruction inst) SwitchToFarCode(); SetJumpTarget(far); - LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_LR])); + LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(spr[SPR_LR])); AND(WA, WA, 30, 29); // Wipe the bottom 2 bits. if (inst.LK) { ARM64Reg WB = gpr.GetReg(); - u32 Jumpto = js.compilerPC + 4; - MOVI2R(WB, Jumpto); - STR(INDEX_UNSIGNED, WB, X29, PPCSTATE_OFF(spr[SPR_LR])); + MOVI2R(WB, js.compilerPC + 4); + STR(INDEX_UNSIGNED, WB, PPC_REG, PPCSTATE_OFF(spr[SPR_LR])); gpr.Unlock(WB); } gpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE); fpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE); - WriteExitDestInR(WA); + WriteExit(WA); SwitchToNearCode(); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index e112b069be..5335f474ce 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -264,7 +264,7 @@ void JitArm64::fcmpX(UGeckoInstruction inst) } SetJumpTarget(continue1); - STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[0]) + (sizeof(PowerPC::ppcState.cr_val[0]) * crf)); + STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[0]) + (sizeof(PowerPC::ppcState.cr_val[0]) * crf)); gpr.Unlock(WA); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 48a9945500..2e6e0e6f26 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -26,12 +26,12 @@ void JitArm64::ComputeRC(ARM64Reg reg, int crf, bool needs_sext) SXTW(XA, reg); - STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[crf])); + STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf])); gpr.Unlock(WA); } else { - STR(INDEX_UNSIGNED, EncodeRegTo64(reg), X29, PPCSTATE_OFF(cr_val[crf])); + STR(INDEX_UNSIGNED, EncodeRegTo64(reg), PPC_REG, PPCSTATE_OFF(cr_val[crf])); } } @@ -44,7 +44,7 @@ void JitArm64::ComputeRC(u64 imm, int crf, bool needs_sext) if (imm & 0x80000000 && needs_sext) SXTW(XA, WA); - STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[crf])); + STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf])); gpr.Unlock(WA); } @@ -57,12 +57,12 @@ void JitArm64::ComputeCarry(bool Carry) { ARM64Reg WA = gpr.GetReg(); MOVI2R(WA, 1); - STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca)); + STRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); gpr.Unlock(WA); return; } - STRB(INDEX_UNSIGNED, WSP, X29, PPCSTATE_OFF(xer_ca)); + STRB(INDEX_UNSIGNED, WSP, PPC_REG, PPCSTATE_OFF(xer_ca)); } void JitArm64::ComputeCarry() @@ -72,7 +72,7 @@ void JitArm64::ComputeCarry() ARM64Reg WA = gpr.GetReg(); CSINC(WA, WSP, WSP, CC_CC); - STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca)); + STRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); gpr.Unlock(WA); } @@ -447,7 +447,7 @@ void JitArm64::cmp(UGeckoInstruction inst) SXTW(XB, RB); SUB(XA, XA, XB); - STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[0]) + (sizeof(PowerPC::ppcState.cr_val[0]) * crf)); + STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[0]) + (sizeof(PowerPC::ppcState.cr_val[0]) * crf)); gpr.Unlock(WA, WB); } @@ -474,7 +474,7 @@ void JitArm64::cmpl(UGeckoInstruction inst) ARM64Reg WA = gpr.GetReg(); ARM64Reg XA = EncodeRegTo64(WA); SUB(XA, EncodeRegTo64(gpr.R(a)), EncodeRegTo64(gpr.R(b))); - STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[0]) + (sizeof(PowerPC::ppcState.cr_val[0]) * crf)); + STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[0]) + (sizeof(PowerPC::ppcState.cr_val[0]) * crf)); gpr.Unlock(WA); } @@ -540,7 +540,7 @@ void JitArm64::cmpli(UGeckoInstruction inst) SUB(XA, EncodeRegTo64(gpr.R(a)), XA); } - STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[0]) + (sizeof(PowerPC::ppcState.cr_val[0]) * crf)); + STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[0]) + (sizeof(PowerPC::ppcState.cr_val[0]) * crf)); gpr.Unlock(WA); } @@ -641,7 +641,7 @@ void JitArm64::srawix(UGeckoInstruction inst) ANDS(WSP, WA, RA, ArithOption(RA, ST_LSL, 0)); CSINC(WA, WSP, WSP, CC_EQ); - STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca)); + STRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); gpr.Unlock(WA); } else @@ -650,7 +650,7 @@ void JitArm64::srawix(UGeckoInstruction inst) ARM64Reg RA = gpr.R(a); ARM64Reg RS = gpr.R(s); MOV(RA, RS); - STRB(INDEX_UNSIGNED, WSP, X29, PPCSTATE_OFF(xer_ca)); + STRB(INDEX_UNSIGNED, WSP, PPC_REG, PPCSTATE_OFF(xer_ca)); } } @@ -704,7 +704,6 @@ void JitArm64::mulli(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITIntegerOff); - FALLBACK_IF(inst.OE); int a = inst.RA, d = inst.RD; @@ -747,6 +746,56 @@ void JitArm64::mullwx(UGeckoInstruction inst) } } +void JitArm64::mulhwx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITIntegerOff); + + int a = inst.RA, b = inst.RB, d = inst.RD; + + if (gpr.IsImm(a) && gpr.IsImm(b)) + { + s32 i = (s32)gpr.GetImm(a), j = (s32)gpr.GetImm(b); + gpr.SetImmediate(d, (u32)((u64)(((s64)i * (s64)j) ) >> 32)); + if (inst.Rc) + ComputeRC(gpr.GetImm(d), 0); + } + else + { + gpr.BindToRegister(d, d == a || d == b); + SMULL(EncodeRegTo64(gpr.R(d)), gpr.R(a), gpr.R(b)); + LSR(EncodeRegTo64(gpr.R(d)), EncodeRegTo64(gpr.R(d)), 32); + + if (inst.Rc) + ComputeRC(gpr.R(d), 0); + } +} + +void JitArm64::mulhwux(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITIntegerOff); + + int a = inst.RA, b = inst.RB, d = inst.RD; + + if (gpr.IsImm(a) && gpr.IsImm(b)) + { + u32 i = gpr.GetImm(a), j = gpr.GetImm(b); + gpr.SetImmediate(d, (u32)( ( (u64)i * (u64)j) >> 32) ); + if (inst.Rc) + ComputeRC(gpr.GetImm(d), 0); + } + else + { + gpr.BindToRegister(d, d == a || d == b); + UMULL(EncodeRegTo64(gpr.R(d)), gpr.R(a), gpr.R(b)); + LSR(EncodeRegTo64(gpr.R(d)), EncodeRegTo64(gpr.R(d)), 32); + + if (inst.Rc) + ComputeRC(gpr.R(d), 0); + } +} + void JitArm64::addzex(UGeckoInstruction inst) { INSTRUCTION_START @@ -759,14 +808,14 @@ void JitArm64::addzex(UGeckoInstruction inst) { gpr.BindToRegister(d, true); ARM64Reg WA = gpr.GetReg(); - LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca)); + LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); ADDS(gpr.R(d), gpr.R(a), WA); gpr.Unlock(WA); } else { gpr.BindToRegister(d, false); - LDRB(INDEX_UNSIGNED, gpr.R(d), X29, PPCSTATE_OFF(xer_ca)); + LDRB(INDEX_UNSIGNED, gpr.R(d), PPC_REG, PPCSTATE_OFF(xer_ca)); ADDS(gpr.R(d), gpr.R(a), gpr.R(d)); } @@ -814,7 +863,7 @@ void JitArm64::subfex(UGeckoInstruction inst) gpr.BindToRegister(d, false); MOVI2R(gpr.R(d), ~i + j); ARM64Reg WA = gpr.GetReg(); - LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca)); + LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); ADD(gpr.R(d), gpr.R(d), WA); gpr.Unlock(WA); @@ -840,7 +889,7 @@ void JitArm64::subfex(UGeckoInstruction inst) gpr.BindToRegister(d, d == a || d == b); // upload the carry state - LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca)); + LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); CMP(WA, 1); // d = ~a + b + carry; @@ -935,7 +984,7 @@ void JitArm64::addex(UGeckoInstruction inst) gpr.BindToRegister(d, false); MOVI2R(gpr.R(d), i + j); ARM64Reg WA = gpr.GetReg(); - LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca)); + LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); ADD(gpr.R(d), gpr.R(d), WA); gpr.Unlock(WA); @@ -961,7 +1010,7 @@ void JitArm64::addex(UGeckoInstruction inst) // upload the carry state ARM64Reg WA = gpr.GetReg(); - LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca)); + LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); CMP(WA, 1); gpr.Unlock(WA); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index b9f130394a..d122170b85 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -463,19 +463,15 @@ void JitArm64::lXX(UGeckoInstruction inst) ARM64Reg WA = gpr.GetReg(); ARM64Reg XA = EncodeRegTo64(WA); - MOVI2R(XA, (u64)&CoreTiming::Idle); BLR(XA); - gpr.Unlock(WA); - WriteExceptionExit(); + + WriteExceptionExit(js.compilerPC); SwitchToNearCode(); SetJumpTarget(noIdle); - - //js.compilerPC += 8; - return; } } @@ -597,7 +593,7 @@ void JitArm64::lmw(UGeckoInstruction inst) MOVI2R(WA, (u32)(s32)(s16)inst.SIMM_16); } - ADD(XA, XA, X28); + ADD(XA, XA, MEM_REG); for (int i = inst.RD; i < 32; i++) { diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index dde52cfad5..f281d29d27 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -45,9 +45,9 @@ void JitArm64::psq_l(UGeckoInstruction inst) if (inst.RA || update) // Always uses the register on update { if (offset >= 0) - ADD(addr_reg, gpr.R(inst.RA), offset); + ADD(addr_reg, arm_addr, offset); else - SUB(addr_reg, gpr.R(inst.RA), std::abs(offset)); + SUB(addr_reg, arm_addr, std::abs(offset)); } else { @@ -65,18 +65,18 @@ void JitArm64::psq_l(UGeckoInstruction inst) VS = fpr.RW(inst.RS, REG_REG_SINGLE); if (!inst.W) { - ADD(EncodeRegTo64(addr_reg), EncodeRegTo64(addr_reg), X28); + ADD(EncodeRegTo64(addr_reg), EncodeRegTo64(addr_reg), MEM_REG); m_float_emit.LD1(32, 1, EncodeRegToDouble(VS), EncodeRegTo64(addr_reg)); } else { - m_float_emit.LDR(32, VS, EncodeRegTo64(addr_reg), X28); + m_float_emit.LDR(32, VS, EncodeRegTo64(addr_reg), MEM_REG); } m_float_emit.REV32(8, EncodeRegToDouble(VS), EncodeRegToDouble(VS)); } else { - LDR(INDEX_UNSIGNED, scale_reg, X29, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I])); + LDR(INDEX_UNSIGNED, scale_reg, PPC_REG, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I])); UBFM(type_reg, scale_reg, 16, 18); // Type UBFM(scale_reg, scale_reg, 24, 29); // Scale @@ -179,7 +179,7 @@ void JitArm64::psq_st(UGeckoInstruction inst) m_float_emit.FCVTN(32, D0, VS); } - LDR(INDEX_UNSIGNED, scale_reg, X29, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I])); + LDR(INDEX_UNSIGNED, scale_reg, PPC_REG, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I])); UBFM(type_reg, scale_reg, 0, 2); // Type UBFM(scale_reg, scale_reg, 8, 13); // Scale diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index 77eed0a70b..09cb1efc5d 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -101,7 +101,7 @@ void Arm64GPRCache::FlushRegister(u32 preg, bool maintain_state) { ARM64Reg host_reg = reg.GetReg(); if (reg.IsDirty()) - m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg])); + m_emit->STR(INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(gpr[preg])); if (!maintain_state) { @@ -113,14 +113,14 @@ void Arm64GPRCache::FlushRegister(u32 preg, bool maintain_state) { if (!reg.GetImm()) { - m_emit->STR(INDEX_UNSIGNED, WSP, X29, PPCSTATE_OFF(gpr[preg])); + m_emit->STR(INDEX_UNSIGNED, WSP, PPC_REG, PPCSTATE_OFF(gpr[preg])); } else { ARM64Reg host_reg = GetReg(); m_emit->MOVI2R(host_reg, reg.GetImm()); - m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg])); + m_emit->STR(INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(gpr[preg])); UnlockRegister(host_reg); } @@ -147,7 +147,7 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, bool maintain_state) ARM64Reg RX1 = R(i); ARM64Reg RX2 = R(i + 1); - m_emit->STP(INDEX_SIGNED, RX1, RX2, X29, PPCSTATE_OFF(gpr[0]) + i * sizeof(u32)); + m_emit->STP(INDEX_SIGNED, RX1, RX2, PPC_REG, PPCSTATE_OFF(gpr[0]) + i * sizeof(u32)); if (!maintain_state) { UnlockRegister(RX1); @@ -210,7 +210,7 @@ ARM64Reg Arm64GPRCache::R(u32 preg) ARM64Reg host_reg = GetReg(); reg.Load(host_reg); reg.SetDirty(false); - m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg])); + m_emit->LDR(INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(gpr[preg])); return host_reg; } break; @@ -242,7 +242,7 @@ void Arm64GPRCache::BindToRegister(u32 preg, bool do_load) ARM64Reg host_reg = GetReg(); reg.Load(host_reg); if (do_load) - m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg])); + m_emit->LDR(INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(gpr[preg])); } } @@ -345,7 +345,7 @@ ARM64Reg Arm64FPRCache::R(u32 preg, RegType type) { // Load the high 64bits from the file and insert them in to the high 64bits of the host register ARM64Reg tmp_reg = GetReg(); - m_float_emit->LDR(64, INDEX_UNSIGNED, tmp_reg, X29, PPCSTATE_OFF(ps[preg][1])); + m_float_emit->LDR(64, INDEX_UNSIGNED, tmp_reg, PPC_REG, PPCSTATE_OFF(ps[preg][1])); m_float_emit->INS(64, host_reg, 1, tmp_reg, 0); UnlockRegister(tmp_reg); @@ -399,7 +399,7 @@ ARM64Reg Arm64FPRCache::R(u32 preg, RegType type) reg.Load(host_reg, REG_LOWER_PAIR); } reg.SetDirty(false); - m_float_emit->LDR(load_size, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0])); + m_float_emit->LDR(load_size, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0])); return host_reg; } default: @@ -446,7 +446,7 @@ ARM64Reg Arm64FPRCache::RW(u32 preg, RegType type) case REG_REG: // We are doing a full 128bit store because it takes 2 cycles on a Cortex-A57 to do a 128bit store. // It would take longer to do an insert to a temporary and a 64bit store than to just do this. - m_float_emit->STR(128, INDEX_UNSIGNED, flush_reg, X29, PPCSTATE_OFF(ps[preg][0])); + m_float_emit->STR(128, INDEX_UNSIGNED, flush_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0])); break; case REG_DUP_SINGLE: flush_reg = GetReg(); @@ -454,7 +454,7 @@ ARM64Reg Arm64FPRCache::RW(u32 preg, RegType type) // fall through case REG_DUP: // Store PSR1 (which is equal to PSR0) in memory. - m_float_emit->STR(64, INDEX_UNSIGNED, flush_reg, X29, PPCSTATE_OFF(ps[preg][1])); + m_float_emit->STR(64, INDEX_UNSIGNED, flush_reg, PPC_REG, PPCSTATE_OFF(ps[preg][1])); break; default: // All other types doesn't store anything in PSR1. @@ -543,7 +543,7 @@ void Arm64FPRCache::FlushRegister(u32 preg, bool maintain_state) store_size = 64; if (dirty) - m_float_emit->STR(store_size, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0])); + m_float_emit->STR(store_size, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0])); if (!maintain_state) { @@ -557,9 +557,9 @@ void Arm64FPRCache::FlushRegister(u32 preg, bool maintain_state) { // If the paired registers were at the start of ppcState we could do an STP here. // Too bad moving them would break savestate compatibility between x86_64 and AArch64 - //m_float_emit->STP(64, INDEX_SIGNED, host_reg, host_reg, X29, PPCSTATE_OFF(ps[preg][0])); - m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0])); - m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][1])); + //m_float_emit->STP(64, INDEX_SIGNED, host_reg, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0])); + m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0])); + m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][1])); } if (!maintain_state) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index 6616e4b6fb..2af6281a49 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -9,12 +9,24 @@ #include "Common/Arm64Emitter.h" #include "Core/PowerPC/Gekko.h" +#include "Core/PowerPC/PowerPC.h" #include "Core/PowerPC/PPCAnalyst.h" -// Dedicated host registers -// X29 = ppcState pointer using namespace Arm64Gen; +// Dedicated host registers +static const ARM64Reg MEM_REG = X28; // memory base register +static const ARM64Reg PPC_REG = X29; // ppcState pointer +static const ARM64Reg DISPATCHER_PC = W26; // register for PC when calling the dispatcher + +#define PPCSTATE_OFF(elem) (offsetof(PowerPC::PowerPCState, elem)) + +// Some asserts to make sure we will be able to load everything +static_assert(PPCSTATE_OFF(spr[1023]) <= 16380, "LDR(32bit) can't reach the last SPR"); +static_assert((PPCSTATE_OFF(ps[0][0]) % 8) == 0, "LDR(64bit VFP) requires FPRs to be 8 byte aligned"); +static_assert(PPCSTATE_OFF(xer_ca) < 4096, "STRB can't store xer_ca!"); +static_assert(PPCSTATE_OFF(xer_so_ov) < 4096, "STRB can't store xer_so_ov!"); + enum RegType { REG_NOTLOADED = 0, diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index efeddfd89f..d275bdab3b 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -23,20 +23,20 @@ FixupBranch JitArm64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) switch (bit) { case CR_SO_BIT: // check bit 61 set - LDR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[field])); + LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field])); branch = jump_if_set ? TBNZ(XA, 61) : TBZ(XA, 61); break; case CR_EQ_BIT: // check bits 31-0 == 0 - LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(cr_val[field])); + LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(cr_val[field])); branch = jump_if_set ? CBZ(WA) : CBNZ(WA); break; case CR_GT_BIT: // check val > 0 - LDR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[field])); + LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field])); CMP(XA, SP); branch = B(jump_if_set ? CC_GT : CC_LE); break; case CR_LT_BIT: // check bit 62 set - LDR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[field])); + LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field])); branch = jump_if_set ? TBNZ(XA, 62) : TBZ(XA, 62); break; default: @@ -53,7 +53,7 @@ void JitArm64::mtmsr(UGeckoInstruction inst) JITDISABLE(bJITSystemRegistersOff); gpr.BindToRegister(inst.RS, true); - STR(INDEX_UNSIGNED, gpr.R(inst.RS), X29, PPCSTATE_OFF(msr)); + STR(INDEX_UNSIGNED, gpr.R(inst.RS), PPC_REG, PPCSTATE_OFF(msr)); gpr.Flush(FlushMode::FLUSH_ALL); fpr.Flush(FlushMode::FLUSH_ALL); @@ -67,7 +67,7 @@ void JitArm64::mfmsr(UGeckoInstruction inst) JITDISABLE(bJITSystemRegistersOff); gpr.BindToRegister(inst.RD, false); - LDR(INDEX_UNSIGNED, gpr.R(inst.RD), X29, PPCSTATE_OFF(msr)); + LDR(INDEX_UNSIGNED, gpr.R(inst.RD), PPC_REG, PPCSTATE_OFF(msr)); } void JitArm64::mcrf(UGeckoInstruction inst) @@ -79,8 +79,8 @@ void JitArm64::mcrf(UGeckoInstruction inst) { ARM64Reg WA = gpr.GetReg(); ARM64Reg XA = EncodeRegTo64(WA); - LDR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[inst.CRFS])); - STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[inst.CRFD])); + LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[inst.CRFS])); + STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[inst.CRFD])); gpr.Unlock(WA); } } @@ -91,7 +91,7 @@ void JitArm64::mfsr(UGeckoInstruction inst) JITDISABLE(bJITSystemRegistersOff); gpr.BindToRegister(inst.RD, false); - LDR(INDEX_UNSIGNED, gpr.R(inst.RD), X29, PPCSTATE_OFF(sr[inst.SR])); + LDR(INDEX_UNSIGNED, gpr.R(inst.RD), PPC_REG, PPCSTATE_OFF(sr[inst.SR])); } void JitArm64::mtsr(UGeckoInstruction inst) @@ -100,7 +100,7 @@ void JitArm64::mtsr(UGeckoInstruction inst) JITDISABLE(bJITSystemRegistersOff); gpr.BindToRegister(inst.RS, true); - STR(INDEX_UNSIGNED, gpr.R(inst.RS), X29, PPCSTATE_OFF(sr[inst.SR])); + STR(INDEX_UNSIGNED, gpr.R(inst.RS), PPC_REG, PPCSTATE_OFF(sr[inst.SR])); } void JitArm64::mfsrin(UGeckoInstruction inst) @@ -116,7 +116,7 @@ void JitArm64::mfsrin(UGeckoInstruction inst) ARM64Reg RB = gpr.R(b); UBFM(index, RB, 28, 31); - ADD(index64, X29, index64, ArithOption(index64, ST_LSL, 2)); + ADD(index64, PPC_REG, index64, ArithOption(index64, ST_LSL, 2)); LDR(INDEX_UNSIGNED, gpr.R(d), index64, PPCSTATE_OFF(sr[0])); gpr.Unlock(index); @@ -135,7 +135,7 @@ void JitArm64::mtsrin(UGeckoInstruction inst) ARM64Reg RB = gpr.R(b); UBFM(index, RB, 28, 31); - ADD(index64, X29, index64, ArithOption(index64, ST_LSL, 2)); + ADD(index64, PPC_REG, index64, ArithOption(index64, ST_LSL, 2)); STR(INDEX_UNSIGNED, gpr.R(d), index64, PPCSTATE_OFF(sr[0])); gpr.Unlock(index); @@ -193,14 +193,12 @@ void JitArm64::twx(UGeckoInstruction inst) gpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE); fpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE); - LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(Exceptions)); + LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); ORR(WA, WA, 24, 0); // Same as WA | EXCEPTION_PROGRAM - STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(Exceptions)); + STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); + gpr.Unlock(WA); - MOVI2R(WA, js.compilerPC); - - // WA is unlocked in this function - WriteExceptionExit(WA); + WriteExceptionExit(js.compilerPC); SwitchToNearCode(); @@ -252,7 +250,7 @@ void JitArm64::mfspr(UGeckoInstruction inst) MOVI2R(XB, (u64)&CoreTiming::fakeTBStartValue); LDR(INDEX_UNSIGNED, XB, XB, 0); ADD(XA, XB, XA, ArithOption(XA, ST_LSR, 3)); - STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(spr[SPR_TL])); + STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(spr[SPR_TL])); if (MergeAllowedNextInstructions(1)) { @@ -295,10 +293,10 @@ void JitArm64::mfspr(UGeckoInstruction inst) gpr.BindToRegister(d, false); ARM64Reg RD = gpr.R(d); ARM64Reg WA = gpr.GetReg(); - LDRH(INDEX_UNSIGNED, RD, X29, PPCSTATE_OFF(xer_stringctrl)); - LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca)); + LDRH(INDEX_UNSIGNED, RD, PPC_REG, PPCSTATE_OFF(xer_stringctrl)); + LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); ORR(RD, RD, WA, ArithOption(WA, ST_LSL, XER_CA_SHIFT)); - LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_so_ov)); + LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_so_ov)); ORR(RD, RD, WA, ArithOption(WA, ST_LSL, XER_OV_SHIFT)); gpr.Unlock(WA); } @@ -309,7 +307,7 @@ void JitArm64::mfspr(UGeckoInstruction inst) default: gpr.BindToRegister(d, false); ARM64Reg RD = gpr.R(d); - LDR(INDEX_UNSIGNED, RD, X29, PPCSTATE_OFF(spr) + iIndex * 4); + LDR(INDEX_UNSIGNED, RD, PPC_REG, PPCSTATE_OFF(spr) + iIndex * 4); break; } } @@ -359,11 +357,11 @@ void JitArm64::mtspr(UGeckoInstruction inst) ARM64Reg RD = gpr.R(inst.RD); ARM64Reg WA = gpr.GetReg(); AND(WA, RD, 24, 30); - STRH(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_stringctrl)); + STRH(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_stringctrl)); UBFM(WA, RD, XER_CA_SHIFT, XER_CA_SHIFT + 1); - STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca)); + STRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); UBFM(WA, RD, XER_OV_SHIFT, 31); // Same as WA = RD >> XER_OV_SHIFT - STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_so_ov)); + STRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_so_ov)); gpr.Unlock(WA); } break; @@ -373,7 +371,7 @@ void JitArm64::mtspr(UGeckoInstruction inst) // OK, this is easy. ARM64Reg RD = gpr.R(inst.RD); - STR(INDEX_UNSIGNED, RD, X29, PPCSTATE_OFF(spr) + iIndex * 4); + STR(INDEX_UNSIGNED, RD, PPC_REG, PPCSTATE_OFF(spr) + iIndex * 4); } void JitArm64::crXXX(UGeckoInstruction inst) @@ -390,7 +388,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) ARM64Reg WA = gpr.GetReg(); ARM64Reg XA = EncodeRegTo64(WA); - LDR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val) + 8 * field); + LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field); switch (bit) { case CR_SO_BIT: @@ -409,7 +407,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) AND(XA, XA, 64 - 63, 62, true); // XA & ~(1<<62) break; } - STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val) + 8 * field); + STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field); gpr.Unlock(WA); return; } @@ -423,7 +421,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) ARM64Reg WA = gpr.GetReg(); ARM64Reg XA = EncodeRegTo64(WA); - LDR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val) + 8 * field); + LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field); if (bit != CR_GT_BIT) { @@ -456,7 +454,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) ORR(XA, XA, 32, 0, true); // XA | 1<<32 - STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val) + 8 * field); + STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field); gpr.Unlock(WA); return; } @@ -481,7 +479,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) ARM64Reg WC = gpr.GetReg(); ARM64Reg XC = EncodeRegTo64(WC); - LDR(INDEX_UNSIGNED, XC, X29, PPCSTATE_OFF(cr_val) + 8 * field); + LDR(INDEX_UNSIGNED, XC, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field); switch (bit) { case CR_SO_BIT: // check bit 61 set @@ -538,7 +536,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) int field = inst.CRBD >> 2; int bit = 3 - (inst.CRBD & 3); - LDR(INDEX_UNSIGNED, XB, X29, PPCSTATE_OFF(cr_val) + 8 * field); + LDR(INDEX_UNSIGNED, XB, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field); // Gross but necessary; if the input is totally zero and we set SO or LT, // or even just add the (1<<32), GT will suddenly end up set without us @@ -576,7 +574,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) } ORR(XA, XA, 32, 0, true); // XA | 1<<32 - STR(INDEX_UNSIGNED, XB, X29, PPCSTATE_OFF(cr_val) + 8 * field); + STR(INDEX_UNSIGNED, XB, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field); gpr.Unlock(WA); gpr.Unlock(WB); @@ -627,7 +625,7 @@ void JitArm64::mtcrf(UGeckoInstruction inst) } LDR(XA, XB, ArithOption(XA, true)); - STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val) + 8 * i); + STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * i); } } gpr.Unlock(WA, WB); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index 96b3f9046f..09fdd271ce 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -184,8 +184,8 @@ static GekkoOPTemplate table31[] = {1003, &JitArm64::FallBackToInterpreter}, // divwox {459, &JitArm64::divwux}, // divwux {971, &JitArm64::divwux}, // divwuox - {75, &JitArm64::FallBackToInterpreter}, // mulhwx - {11, &JitArm64::FallBackToInterpreter}, // mulhwux + {75, &JitArm64::mulhwx}, // mulhwx + {11, &JitArm64::mulhwux}, // mulhwux {235, &JitArm64::mullwx}, // mullwx {747, &JitArm64::mullwx}, // mullwox {104, &JitArm64::negx}, // negx diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index 73f2862f6d..2a40b9fce7 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -25,7 +25,12 @@ void JitArm64AsmRoutineManager::Generate() ABI_PushRegisters(regs_to_save); - MOVI2R(X29, (u64)&PowerPC::ppcState); + MOVI2R(PPC_REG, (u64)&PowerPC::ppcState); + MOVI2R(MEM_REG, (u64)Memory::logical_base); + + // Load the current PC into DISPATCHER_PC + LDR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc)); + FixupBranch to_dispatcher = B(); // If we align the dispatcher to a page then we can load its location with one ADRP instruction @@ -44,11 +49,10 @@ void JitArm64AsmRoutineManager::Generate() // This block of code gets the address of the compiled block of code // It runs though to the compiling portion if it isn't found - LDR(INDEX_UNSIGNED, W28, X29, PPCSTATE_OFF(pc)); // Load the current PC into W28 - BFM(W28, WSP, 3, 2); // Wipe the top 3 bits. Same as PC & JIT_ICACHE_MASK + BFM(DISPATCHER_PC, WSP, 3, 2); // Wipe the top 3 bits. Same as PC & JIT_ICACHE_MASK MOVI2R(X27, (u64)jit->GetBlockCache()->iCache.data()); - LDR(W27, X27, X28); + LDR(W27, X27, EncodeRegTo64(DISPATCHER_PC)); FixupBranch JitBlock = TBNZ(W27, 7); // Test the 7th bit // Success, it is our Jitblock. @@ -60,6 +64,8 @@ void JitArm64AsmRoutineManager::Generate() SetJumpTarget(JitBlock); + STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc)); + MOVI2R(X30, (u64)&Jit); BLR(X30); @@ -71,12 +77,14 @@ void JitArm64AsmRoutineManager::Generate() BLR(X30); // Does exception checking - LDR(INDEX_UNSIGNED, W0, X29, PPCSTATE_OFF(pc)); - STR(INDEX_UNSIGNED, W0, X29, PPCSTATE_OFF(npc)); + LDR(INDEX_UNSIGNED, W0, PPC_REG, PPCSTATE_OFF(Exceptions)); + FixupBranch no_exceptions = CBZ(W0); + STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc)); + STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(npc)); MOVI2R(X30, (u64)&PowerPC::CheckExceptions); BLR(X30); - LDR(INDEX_UNSIGNED, W0, X29, PPCSTATE_OFF(npc)); - STR(INDEX_UNSIGNED, W0, X29, PPCSTATE_OFF(pc)); + LDR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(npc)); + SetJumpTarget(no_exceptions); // Check the state pointer to see if we are exiting // Gets checked on every exception check @@ -89,6 +97,7 @@ void JitArm64AsmRoutineManager::Generate() B(dispatcher); SetJumpTarget(Exit); + STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc)); // Let the waiting thread know we are done leaving MOVI2R(X0, (u64)&PowerPC::FinishStateMove); @@ -583,7 +592,7 @@ void JitArm64AsmRoutineManager::GenMfcr() const u8* start = GetCodePtr(); for (int i = 0; i < 8; i++) { - LDR(INDEX_UNSIGNED, X1, X29, PPCSTATE_OFF(cr_val) + 8 * i); + LDR(INDEX_UNSIGNED, X1, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * i); // SO if (i == 0)