From bb39ba1f3a76136a850da505f1b411e6c142bc0b Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Fri, 7 Aug 2015 22:11:38 -0500 Subject: [PATCH] [AArch64] Banish slowmem operations to farcode. This improves performance pretty much across the board for games. The increase in performance is mainly from removing some code from the main JIT blocks of code (pushing and popping millions of registers) and throwing them in farcode where it doesn't pollute the icache. --- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 13 +- Source/Core/Core/PowerPC/JitArm64/Jit.h | 36 ++- .../PowerPC/JitArm64/JitArm64_BackPatch.cpp | 247 ++++++++++-------- .../PowerPC/JitArm64/JitArm64_LoadStore.cpp | 29 +- .../JitArm64/JitArm64_LoadStoreFloating.cpp | 34 +-- 5 files changed, 197 insertions(+), 162 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 9836e0b70a..a4a6d62a83 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -14,10 +14,13 @@ using namespace Arm64Gen; +static const int AARCH64_FARCODE_SIZE = 1024 * 1024 * 16; + void JitArm64::Init() { - AllocCodeSpace(CODE_SIZE); - farcode.Init(SConfig::GetInstance().bMMU ? FARCODE_SIZE_MMU : FARCODE_SIZE); + size_t child_code_size = SConfig::GetInstance().bMMU ? FARCODE_SIZE_MMU : AARCH64_FARCODE_SIZE; + AllocCodeSpace(CODE_SIZE + child_code_size); + AddChildCodeSpace(&farcode, child_code_size); jo.enableBlocklink = true; jo.optimizeGatherPipe = true; UpdateMemoryOptions(); @@ -36,16 +39,18 @@ void JitArm64::Init() void JitArm64::ClearCache() { + m_fault_to_handler.clear(); + m_handler_to_loc.clear(); + + blocks.Clear(); ClearCodeSpace(); farcode.ClearCodeSpace(); - blocks.Clear(); UpdateMemoryOptions(); } void JitArm64::Shutdown() { FreeCodeSpace(); - farcode.Shutdown(); blocks.Shutdown(); asm_routines.Shutdown(); } diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 476af4c3d8..de784b8f61 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -18,15 +18,6 @@ #define PPCSTATE_OFF(elem) (offsetof(PowerPC::PowerPCState, elem)) -// A place to throw blocks of code we don't want polluting the cache, e.g. rarely taken -// exception branches. -class FarCodeCacheArm64 : public Arm64Gen::ARM64CodeBlock -{ -public: - void Init(int size) { AllocCodeSpace(size); } - void Shutdown() { FreeCodeSpace(); } -}; - // Some asserts to make sure we will be able to load everything static_assert(PPCSTATE_OFF(spr[1023]) <= 16380, "LDR(32bit) can't reach the last SPR"); static_assert((PPCSTATE_OFF(ps[0][0]) % 8) == 0, "LDR(64bit VFP) requires FPRs to be 8 byte aligned"); @@ -184,6 +175,27 @@ public: void psq_st(UGeckoInstruction inst); private: + + struct SlowmemHandler + { + ARM64Reg dest_reg; + ARM64Reg addr_reg; + BitSet32 gprs; + BitSet32 fprs; + u32 flags; + bool operator< (const SlowmemHandler& rhs) const + { + return !(dest_reg == rhs.dest_reg && + addr_reg == rhs.addr_reg && + gprs == rhs.gprs && + fprs == rhs.fprs && + flags == rhs.flags); + } + }; + + // + std::map> m_fault_to_handler; + std::map m_handler_to_loc; Arm64GPRCache gpr; Arm64FPRCache fpr; @@ -194,7 +206,7 @@ private: ARM64FloatEmitter m_float_emit; - FarCodeCacheArm64 farcode; + Arm64Gen::ARM64CodeBlock farcode; u8* nearcode; // Backed up when we switch to far code. // Simple functions to switch between near and far code emitting @@ -219,7 +231,9 @@ private: // Backpatching routines bool DisasmLoadStore(const u8* ptr, u32* flags, Arm64Gen::ARM64Reg* reg); void InitBackpatch(); - u32 EmitBackpatchRoutine(ARM64XEmitter* emit, u32 flags, bool fastmem, bool do_padding, Arm64Gen::ARM64Reg RS, Arm64Gen::ARM64Reg addr); + u32 EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, + Arm64Gen::ARM64Reg RS, Arm64Gen::ARM64Reg addr, + BitSet32 gprs_to_push = BitSet32(0), BitSet32 fprs_to_push = BitSet32(0)); // Loadstore routines void SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update); void SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 83605bb07f..303f180e7f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -127,10 +127,14 @@ bool JitArm64::DisasmLoadStore(const u8* ptr, u32* flags, ARM64Reg* reg) return false; } -u32 JitArm64::EmitBackpatchRoutine(ARM64XEmitter* emit, u32 flags, bool fastmem, bool do_padding, ARM64Reg RS, ARM64Reg addr) +u32 JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, + ARM64Reg RS, ARM64Reg addr, + BitSet32 gprs_to_push, BitSet32 fprs_to_push) { + bool in_far_code = false; u32 trouble_offset = 0; - const u8* code_base = emit->GetCodePtr(); + const u8* trouble_location = nullptr; + const u8* code_base = GetCodePtr(); if (fastmem) { @@ -140,169 +144,199 @@ u32 JitArm64::EmitBackpatchRoutine(ARM64XEmitter* emit, u32 flags, bool fastmem, if (flags & BackPatchInfo::FLAG_STORE && flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) { - ARM64FloatEmitter float_emit(emit); if (flags & BackPatchInfo::FLAG_SIZE_F32) { - float_emit.FCVT(32, 64, D0, RS); - float_emit.REV32(8, D0, D0); - trouble_offset = (emit->GetCodePtr() - code_base) / 4; - float_emit.STR(32, INDEX_UNSIGNED, D0, addr, 0); + m_float_emit.FCVT(32, 64, D0, RS); + m_float_emit.REV32(8, D0, D0); + trouble_offset = (GetCodePtr() - code_base) / 4; + trouble_location = GetCodePtr(); + m_float_emit.STR(32, INDEX_UNSIGNED, D0, addr, 0); } else { - float_emit.REV64(8, Q0, RS); - trouble_offset = (emit->GetCodePtr() - code_base) / 4; - float_emit.STR(64, INDEX_UNSIGNED, Q0, addr, 0); + m_float_emit.REV64(8, Q0, RS); + trouble_offset = (GetCodePtr() - code_base) / 4; + trouble_location = GetCodePtr(); + m_float_emit.STR(64, INDEX_UNSIGNED, Q0, addr, 0); } } else if (flags & BackPatchInfo::FLAG_LOAD && flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) { - ARM64FloatEmitter float_emit(emit); - trouble_offset = (emit->GetCodePtr() - code_base) / 4; + trouble_offset = (GetCodePtr() - code_base) / 4; + trouble_location = GetCodePtr(); if (flags & BackPatchInfo::FLAG_SIZE_F32) { - float_emit.LD1R(32, EncodeRegToDouble(RS), addr); - float_emit.REV32(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS)); - float_emit.FCVTL(64, EncodeRegToDouble(RS), EncodeRegToDouble(RS)); + m_float_emit.LD1R(32, EncodeRegToDouble(RS), addr); + m_float_emit.REV32(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS)); + m_float_emit.FCVTL(64, EncodeRegToDouble(RS), EncodeRegToDouble(RS)); } else { - float_emit.LDR(64, INDEX_UNSIGNED, Q0, addr, 0); - float_emit.REV64(8, D0, D0); - float_emit.INS(64, RS, 0, Q0, 0); + m_float_emit.LDR(64, INDEX_UNSIGNED, Q0, addr, 0); + m_float_emit.REV64(8, D0, D0); + m_float_emit.INS(64, RS, 0, Q0, 0); } } else if (flags & BackPatchInfo::FLAG_STORE) { ARM64Reg temp = W0; if (flags & BackPatchInfo::FLAG_SIZE_32) - emit->REV32(temp, RS); + REV32(temp, RS); else if (flags & BackPatchInfo::FLAG_SIZE_16) - emit->REV16(temp, RS); + REV16(temp, RS); - trouble_offset = (emit->GetCodePtr() - code_base) / 4; + trouble_offset = (GetCodePtr() - code_base) / 4; + trouble_location = GetCodePtr(); if (flags & BackPatchInfo::FLAG_SIZE_32) - emit->STR(INDEX_UNSIGNED, temp, addr, 0); + STR(INDEX_UNSIGNED, temp, addr, 0); else if (flags & BackPatchInfo::FLAG_SIZE_16) - emit->STRH(INDEX_UNSIGNED, temp, addr, 0); + STRH(INDEX_UNSIGNED, temp, addr, 0); else - emit->STRB(INDEX_UNSIGNED, RS, addr, 0); + STRB(INDEX_UNSIGNED, RS, addr, 0); } else { - trouble_offset = (emit->GetCodePtr() - code_base) / 4; + trouble_offset = (GetCodePtr() - code_base) / 4; + trouble_location = GetCodePtr(); if (flags & BackPatchInfo::FLAG_SIZE_32) - emit->LDR(INDEX_UNSIGNED, RS, addr, 0); + LDR(INDEX_UNSIGNED, RS, addr, 0); else if (flags & BackPatchInfo::FLAG_SIZE_16) - emit->LDRH(INDEX_UNSIGNED, RS, addr, 0); + LDRH(INDEX_UNSIGNED, RS, addr, 0); else if (flags & BackPatchInfo::FLAG_SIZE_8) - emit->LDRB(INDEX_UNSIGNED, RS, addr, 0); + LDRB(INDEX_UNSIGNED, RS, addr, 0); if (!(flags & BackPatchInfo::FLAG_REVERSE)) { if (flags & BackPatchInfo::FLAG_SIZE_32) - emit->REV32(RS, RS); + REV32(RS, RS); else if (flags & BackPatchInfo::FLAG_SIZE_16) - emit->REV16(RS, RS); + REV16(RS, RS); } if (flags & BackPatchInfo::FLAG_EXTEND) - emit->SXTH(RS, RS); + SXTH(RS, RS); } } - else + + if (!fastmem || do_farcode) { - if (flags & BackPatchInfo::FLAG_STORE && - flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) + if (fastmem && do_farcode) { - ARM64FloatEmitter float_emit(emit); - if (flags & BackPatchInfo::FLAG_SIZE_F32) + SlowmemHandler handler; + handler.dest_reg = RS; + handler.addr_reg = addr; + handler.gprs = gprs_to_push; + handler.fprs = fprs_to_push; + handler.flags = flags; + + std::map::iterator handler_loc_iter; + handler_loc_iter = m_handler_to_loc.find(handler); + + if (handler_loc_iter == m_handler_to_loc.end()) { - float_emit.FCVT(32, 64, D0, RS); - float_emit.UMOV(32, W0, Q0, 0); - emit->MOVI2R(X30, (u64)&PowerPC::Write_U32); - emit->BLR(X30); + in_far_code = true; + SwitchToFarCode(); + const u8* handler_loc = GetCodePtr(); + m_handler_to_loc[handler] = handler_loc; + m_fault_to_handler[trouble_location] = std::make_pair(handler, handler_loc); } else { - emit->MOVI2R(X30, (u64)&PowerPC::Write_U64); - float_emit.UMOV(64, X0, RS, 0); - emit->BLR(X30); + const u8* handler_loc = handler_loc_iter->second; + m_fault_to_handler[trouble_location] = std::make_pair(handler, handler_loc); + return trouble_offset; + } + } + + ABI_PushRegisters(gprs_to_push); + m_float_emit.ABI_PushRegisters(fprs_to_push, X30); + + if (flags & BackPatchInfo::FLAG_STORE && + flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) + { + if (flags & BackPatchInfo::FLAG_SIZE_F32) + { + m_float_emit.FCVT(32, 64, D0, RS); + m_float_emit.UMOV(32, W0, Q0, 0); + MOVI2R(X30, (u64)&PowerPC::Write_U32); + BLR(X30); + } + else + { + MOVI2R(X30, (u64)&PowerPC::Write_U64); + m_float_emit.UMOV(64, X0, RS, 0); + BLR(X30); } } else if (flags & BackPatchInfo::FLAG_LOAD && flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) { - ARM64FloatEmitter float_emit(emit); if (flags & BackPatchInfo::FLAG_SIZE_F32) { - emit->MOVI2R(X30, (u64)&PowerPC::Read_U32); - emit->BLR(X30); - float_emit.DUP(32, RS, X0); - float_emit.FCVTL(64, RS, RS); + MOVI2R(X30, (u64)&PowerPC::Read_U32); + BLR(X30); + m_float_emit.DUP(32, RS, X0); + m_float_emit.FCVTL(64, RS, RS); } else { - emit->MOVI2R(X30, (u64)&PowerPC::Read_F64); - emit->BLR(X30); - float_emit.INS(64, RS, 0, X0); + MOVI2R(X30, (u64)&PowerPC::Read_F64); + BLR(X30); + m_float_emit.INS(64, RS, 0, X0); } } else if (flags & BackPatchInfo::FLAG_STORE) { - emit->MOV(W0, RS); + MOV(W0, RS); if (flags & BackPatchInfo::FLAG_SIZE_32) - emit->MOVI2R(X30, (u64)&PowerPC::Write_U32); + MOVI2R(X30, (u64)&PowerPC::Write_U32); else if (flags & BackPatchInfo::FLAG_SIZE_16) - emit->MOVI2R(X30, (u64)&PowerPC::Write_U16); + MOVI2R(X30, (u64)&PowerPC::Write_U16); else - emit->MOVI2R(X30, (u64)&PowerPC::Write_U8); + MOVI2R(X30, (u64)&PowerPC::Write_U8); - emit->BLR(X30); + BLR(X30); } else { if (flags & BackPatchInfo::FLAG_SIZE_32) - emit->MOVI2R(X30, (u64)&PowerPC::Read_U32); + MOVI2R(X30, (u64)&PowerPC::Read_U32); else if (flags & BackPatchInfo::FLAG_SIZE_16) - emit->MOVI2R(X30, (u64)&PowerPC::Read_U16); + MOVI2R(X30, (u64)&PowerPC::Read_U16); else if (flags & BackPatchInfo::FLAG_SIZE_8) - emit->MOVI2R(X30, (u64)&PowerPC::Read_U8); + MOVI2R(X30, (u64)&PowerPC::Read_U8); - emit->BLR(X30); + BLR(X30); if (!(flags & BackPatchInfo::FLAG_REVERSE)) { - emit->MOV(RS, W0); + MOV(RS, W0); } else { if (flags & BackPatchInfo::FLAG_SIZE_32) - emit->REV32(RS, W0); + REV32(RS, W0); else if (flags & BackPatchInfo::FLAG_SIZE_16) - emit->REV16(RS, W0); + REV16(RS, W0); } if (flags & BackPatchInfo::FLAG_EXTEND) - emit->SXTH(RS, RS); + SXTH(RS, RS); } + + m_float_emit.ABI_PopRegisters(fprs_to_push, X30); + ABI_PopRegisters(gprs_to_push); } - if (do_padding) + if (in_far_code) { - BackPatchInfo& info = m_backpatch_info[flags]; - u32 num_insts_max = std::max(info.m_fastmem_size, info.m_slowmem_size); - - u32 code_size = emit->GetCodePtr() - code_base; - code_size /= 4; - - for (u32 i = 0; i < (num_insts_max - code_size); ++i) - emit->HINT(HINT_NOP); + RET(X30); + SwitchToNearCode(); } return trouble_offset; @@ -338,13 +372,22 @@ bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx) return false; } + std::map>::iterator slow_handler_iter = m_fault_to_handler.find((const u8*)ctx->CTX_PC); + BackPatchInfo& info = m_backpatch_info[flags]; ARM64XEmitter emitter((u8*)(ctx->CTX_PC - info.m_fastmem_trouble_inst_offset * 4)); u64 new_pc = (u64)emitter.GetCodePtr(); - // Slowmem routine doesn't need the address location - // It is already in the correct location - EmitBackpatchRoutine(&emitter, flags, false, true, reg, INVALID_REG); + { + u32 num_insts_max = info.m_fastmem_size - 1; + + for (u32 i = 0; i < num_insts_max; ++i) + emitter.HINT(HINT_NOP); + + emitter.BL(slow_handler_iter->second.second); + + m_fault_to_handler.erase(slow_handler_iter); + } emitter.FlushIcache(); ctx->CTX_PC = new_pc; @@ -372,14 +415,14 @@ void JitArm64::InitBackpatch() flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(this, flags, false, false, W0, X1); + EmitBackpatchRoutine(flags, false, false, W0, X1); code_end = GetWritableCodePtr(); info.m_slowmem_size = (code_end - code_base) / 4; SetCodePtr(code_base); info.m_fastmem_trouble_inst_offset = - EmitBackpatchRoutine(this, flags, true, false, W0, X1); + EmitBackpatchRoutine(flags, true, false, W0, X1); code_end = GetWritableCodePtr(); info.m_fastmem_size = (code_end - code_base) / 4; @@ -392,14 +435,14 @@ void JitArm64::InitBackpatch() flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(this, flags, false, false, W0, X1); + EmitBackpatchRoutine(flags, false, false, W0, X1); code_end = GetWritableCodePtr(); info.m_slowmem_size = (code_end - code_base) / 4; SetCodePtr(code_base); info.m_fastmem_trouble_inst_offset = - EmitBackpatchRoutine(this, flags, true, false, W0, X1); + EmitBackpatchRoutine(flags, true, false, W0, X1); code_end = GetWritableCodePtr(); info.m_fastmem_size = (code_end - code_base) / 4; @@ -412,14 +455,14 @@ void JitArm64::InitBackpatch() flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_SIZE_32; - EmitBackpatchRoutine(this, flags, false, false, W0, X1); + EmitBackpatchRoutine(flags, false, false, W0, X1); code_end = GetWritableCodePtr(); info.m_slowmem_size = (code_end - code_base) / 4; SetCodePtr(code_base); info.m_fastmem_trouble_inst_offset = - EmitBackpatchRoutine(this, flags, true, false, W0, X1); + EmitBackpatchRoutine(flags, true, false, W0, X1); code_end = GetWritableCodePtr(); info.m_fastmem_size = (code_end - code_base) / 4; @@ -433,14 +476,14 @@ void JitArm64::InitBackpatch() BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_SIZE_16 | BackPatchInfo::FLAG_EXTEND; - EmitBackpatchRoutine(this, flags, false, false, W0, X1); + EmitBackpatchRoutine(flags, false, false, W0, X1); code_end = GetWritableCodePtr(); info.m_slowmem_size = (code_end - code_base) / 4; SetCodePtr(code_base); info.m_fastmem_trouble_inst_offset = - EmitBackpatchRoutine(this, flags, true, false, W0, X1); + EmitBackpatchRoutine(flags, true, false, W0, X1); code_end = GetWritableCodePtr(); info.m_fastmem_size = (code_end - code_base) / 4; @@ -454,14 +497,14 @@ void JitArm64::InitBackpatch() BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_SIZE_16 | BackPatchInfo::FLAG_REVERSE; - EmitBackpatchRoutine(this, flags, false, false, W0, X1); + EmitBackpatchRoutine(flags, false, false, W0, X1); code_end = GetWritableCodePtr(); info.m_slowmem_size = (code_end - code_base) / 4; SetCodePtr(code_base); info.m_fastmem_trouble_inst_offset = - EmitBackpatchRoutine(this, flags, true, false, W0, X1); + EmitBackpatchRoutine(flags, true, false, W0, X1); code_end = GetWritableCodePtr(); info.m_fastmem_size = (code_end - code_base) / 4; @@ -475,14 +518,14 @@ void JitArm64::InitBackpatch() BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_SIZE_32 | BackPatchInfo::FLAG_REVERSE; - EmitBackpatchRoutine(this, flags, false, false, W0, X1); + EmitBackpatchRoutine(flags, false, false, W0, X1); code_end = GetWritableCodePtr(); info.m_slowmem_size = (code_end - code_base) / 4; SetCodePtr(code_base); info.m_fastmem_trouble_inst_offset = - EmitBackpatchRoutine(this, flags, true, false, W0, X1); + EmitBackpatchRoutine(flags, true, false, W0, X1); code_end = GetWritableCodePtr(); info.m_fastmem_size = (code_end - code_base) / 4; @@ -495,14 +538,14 @@ void JitArm64::InitBackpatch() flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_SIZE_F32; - EmitBackpatchRoutine(this, flags, false, false, Q0, X1); + EmitBackpatchRoutine(flags, false, false, Q0, X1); code_end = GetWritableCodePtr(); info.m_slowmem_size = (code_end - code_base) / 4; SetCodePtr(code_base); info.m_fastmem_trouble_inst_offset = - EmitBackpatchRoutine(this, flags, true, false, Q0, X1); + EmitBackpatchRoutine(flags, true, false, Q0, X1); code_end = GetWritableCodePtr(); info.m_fastmem_size = (code_end - code_base) / 4; @@ -515,14 +558,14 @@ void JitArm64::InitBackpatch() flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_SIZE_F64; - EmitBackpatchRoutine(this, flags, false, false, Q0, X1); + EmitBackpatchRoutine(flags, false, false, Q0, X1); code_end = GetWritableCodePtr(); info.m_slowmem_size = (code_end - code_base) / 4; SetCodePtr(code_base); info.m_fastmem_trouble_inst_offset = - EmitBackpatchRoutine(this, flags, true, false, Q0, X1); + EmitBackpatchRoutine(flags, true, false, Q0, X1); code_end = GetWritableCodePtr(); info.m_fastmem_size = (code_end - code_base) / 4; @@ -539,14 +582,14 @@ void JitArm64::InitBackpatch() flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(this, flags, false, false, W0, X1); + EmitBackpatchRoutine(flags, false, false, W0, X1); code_end = GetWritableCodePtr(); info.m_slowmem_size = (code_end - code_base) / 4; SetCodePtr(code_base); info.m_fastmem_trouble_inst_offset = - EmitBackpatchRoutine(this, flags, true, false, W0, X1); + EmitBackpatchRoutine(flags, true, false, W0, X1); code_end = GetWritableCodePtr(); info.m_fastmem_size = (code_end - code_base) / 4; @@ -559,14 +602,14 @@ void JitArm64::InitBackpatch() flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(this, flags, false, false, W0, X1); + EmitBackpatchRoutine(flags, false, false, W0, X1); code_end = GetWritableCodePtr(); info.m_slowmem_size = (code_end - code_base) / 4; SetCodePtr(code_base); info.m_fastmem_trouble_inst_offset = - EmitBackpatchRoutine(this, flags, true, false, W0, X1); + EmitBackpatchRoutine(flags, true, false, W0, X1); code_end = GetWritableCodePtr(); info.m_fastmem_size = (code_end - code_base) / 4; @@ -579,14 +622,14 @@ void JitArm64::InitBackpatch() flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_SIZE_32; - EmitBackpatchRoutine(this, flags, false, false, W0, X1); + EmitBackpatchRoutine(flags, false, false, W0, X1); code_end = GetWritableCodePtr(); info.m_slowmem_size = (code_end - code_base) / 4; SetCodePtr(code_base); info.m_fastmem_trouble_inst_offset = - EmitBackpatchRoutine(this, flags, true, false, W0, X1); + EmitBackpatchRoutine(flags, true, false, W0, X1); code_end = GetWritableCodePtr(); info.m_fastmem_size = (code_end - code_base) / 4; @@ -599,14 +642,14 @@ void JitArm64::InitBackpatch() flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_SIZE_F32; - EmitBackpatchRoutine(this, flags, false, false, Q0, X1); + EmitBackpatchRoutine(flags, false, false, Q0, X1); code_end = GetWritableCodePtr(); info.m_slowmem_size = (code_end - code_base) / 4; SetCodePtr(code_base); info.m_fastmem_trouble_inst_offset = - EmitBackpatchRoutine(this, flags, true, false, Q0, X1); + EmitBackpatchRoutine(flags, true, false, Q0, X1); code_end = GetWritableCodePtr(); info.m_fastmem_size = (code_end - code_base) / 4; @@ -619,14 +662,14 @@ void JitArm64::InitBackpatch() flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_SIZE_F64; - EmitBackpatchRoutine(this, flags, false, false, Q0, X1); + EmitBackpatchRoutine(flags, false, false, Q0, X1); code_end = GetWritableCodePtr(); info.m_slowmem_size = (code_end - code_base) / 4; SetCodePtr(code_base); info.m_fastmem_trouble_inst_offset = - EmitBackpatchRoutine(this, flags, true, false, Q0, X1); + EmitBackpatchRoutine(flags, true, false, Q0, X1); code_end = GetWritableCodePtr(); info.m_fastmem_size = (code_end - code_base) / 4; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 133ff3e936..ef810edaa1 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -37,7 +37,6 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); regs_in_use[W0] = 0; - regs_in_use[W30] = 0; regs_in_use[dest_reg] = 0; ARM64Reg addr_reg = W0; @@ -148,7 +147,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o if (is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) { - EmitBackpatchRoutine(this, flags, true, false, dest_reg, XA); + EmitBackpatchRoutine(flags, true, false, dest_reg, XA, BitSet32(0), BitSet32(0)); } else if (mmio_address) { @@ -158,16 +157,11 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o } else { - // Has a chance of being backpatched which will destroy our state - // push and pop everything in this instance - ABI_PushRegisters(regs_in_use); - m_float_emit.ABI_PushRegisters(fprs_in_use, X30); - EmitBackpatchRoutine(this, flags, + EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, - dest_reg, XA); - m_float_emit.ABI_PopRegisters(fprs_in_use, X30); - ABI_PopRegisters(regs_in_use); + dest_reg, XA, + regs_in_use, fprs_in_use); } gpr.Unlock(W0, W30); @@ -192,7 +186,6 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); regs_in_use[W0] = 0; regs_in_use[W1] = 0; - regs_in_use[W30] = 0; ARM64Reg addr_reg = W1; @@ -296,7 +289,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s { MOVI2R(XA, imm_addr); - EmitBackpatchRoutine(this, flags, true, false, RS, XA); + EmitBackpatchRoutine(flags, true, false, RS, XA, BitSet32(0), BitSet32(0)); } else if (mmio_address && !(flags & BackPatchInfo::FLAG_REVERSE)) { @@ -309,16 +302,12 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s if (is_immediate) MOVI2R(XA, imm_addr); - // Has a chance of being backpatched which will destroy our state - // push and pop everything in this instance - ABI_PushRegisters(regs_in_use); - m_float_emit.ABI_PushRegisters(fprs_in_use, X30); - EmitBackpatchRoutine(this, flags, + EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, - RS, XA); - m_float_emit.ABI_PopRegisters(fprs_in_use, X30); - ABI_PopRegisters(regs_in_use); + RS, XA, + regs_in_use, + fprs_in_use); } gpr.Unlock(W0, W1, W30); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index da79ce823b..414b16650f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -183,26 +183,20 @@ void JitArm64::lfXX(UGeckoInstruction inst) BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); regs_in_use[W0] = 0; - regs_in_use[W30] = 0; fprs_in_use[0] = 0; // Q0 fprs_in_use[VD - Q0] = 0; if (is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) { - EmitBackpatchRoutine(this, flags, true, false, VD, XA); + EmitBackpatchRoutine(flags, true, false, VD, XA, BitSet32(0), BitSet32(0)); } else { - // Has a chance of being backpatched which will destroy our state - // push and pop everything in this instance - ABI_PushRegisters(regs_in_use); - m_float_emit.ABI_PushRegisters(fprs_in_use, X30); - EmitBackpatchRoutine(this, flags, + EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, - VD, XA); - m_float_emit.ABI_PopRegisters(fprs_in_use, X30); - ABI_PopRegisters(regs_in_use); + VD, XA, + regs_in_use, fprs_in_use); } gpr.Unlock(W0, W30); @@ -383,7 +377,6 @@ void JitArm64::stfXX(UGeckoInstruction inst) BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); regs_in_use[W0] = 0; regs_in_use[W1] = 0; - regs_in_use[W30] = 0; fprs_in_use[0] = 0; // Q0 if (is_immediate) @@ -437,29 +430,20 @@ void JitArm64::stfXX(UGeckoInstruction inst) } else if (PowerPC::IsOptimizableRAMAddress(imm_addr)) { - EmitBackpatchRoutine(this, flags, true, false, V0, XA); + EmitBackpatchRoutine(flags, true, false, V0, XA, BitSet32(0), BitSet32(0)); } else { - ABI_PushRegisters(regs_in_use); - m_float_emit.ABI_PushRegisters(fprs_in_use, X30); - EmitBackpatchRoutine(this, flags, false, false, V0, XA); - m_float_emit.ABI_PopRegisters(fprs_in_use, X30); - ABI_PopRegisters(regs_in_use); + EmitBackpatchRoutine(flags, false, false, V0, XA, regs_in_use, fprs_in_use); } } else { - // Has a chance of being backpatched which will destroy our state - // push and pop everything in this instance - ABI_PushRegisters(regs_in_use); - m_float_emit.ABI_PushRegisters(fprs_in_use, X30); - EmitBackpatchRoutine(this, flags, + EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, - V0, XA); - m_float_emit.ABI_PopRegisters(fprs_in_use, X30); - ABI_PopRegisters(regs_in_use); + V0, XA, + regs_in_use, fprs_in_use); } gpr.Unlock(W0, W1, W30); fpr.Unlock(Q0);