From d64c3dc267b37e37055defc96a8de6982f104ea0 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Mon, 21 Nov 2022 22:39:15 +0100 Subject: [PATCH] Arm64Emitter: Add MOVPage2R utility function This new function is like MOVP2R, except it masks out the lower 12 bits, returning them instead of writing them to the register. These lower 12 bits can then be used as an offset for LDR/STR. This lets us turn ADRP+ADD+LDR sequences with a zero offset into ADRP+LDR sequences with a non-zero offset, saving one instruction. --- Source/Core/Common/Arm64Emitter.h | 17 +++- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 7 +- Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 78 +++++++++---------- .../Core/Core/PowerPC/JitArm64/Jit_Util.cpp | 36 ++++----- 4 files changed, 72 insertions(+), 66 deletions(-) diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index 19d63a8dae..bd2c4822d5 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -1009,12 +1009,20 @@ public: void MOVP2R(ARM64Reg Rd, P* ptr) { ASSERT_MSG(DYNA_REC, Is64Bit(Rd), "Can't store pointers in 32-bit registers"); - MOVI2R(Rd, (uintptr_t)ptr); + MOVI2R(Rd, reinterpret_cast(ptr)); + } + template + // Given an address, stores the page address into a register and returns the page-relative offset + s32 MOVPage2R(ARM64Reg Rd, P* ptr) + { + ASSERT_MSG(DYNA_REC, Is64Bit(Rd), "Can't store pointers in 32-bit registers"); + MOVI2R(Rd, reinterpret_cast(ptr) & ~0xFFFULL); + return static_cast(reinterpret_cast(ptr) & 0xFFFULL); } - // Wrapper around AND x, y, imm etc. - // If you are sure the imm will work, preferably construct a LogicalImm directly instead, - // since that is constexpr and thus can be done at compile-time for constant values. + // Wrappers around bitwise operations with an immediate. If you're sure an imm can be encoded + // without a scratch register, preferably construct a LogicalImm directly instead, + // since that is constexpr and thus can be done at compile time for constant values. void ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch); void ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch); void TSTI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch) @@ -1024,6 +1032,7 @@ public: void ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch); void EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch); + // Wrappers around arithmetic operations with an immediate. void ADDI2R_internal(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool negative, bool flags, ARM64Reg scratch); void ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index a2aae7c2d3..4afd8ac92e 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -915,8 +915,8 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) SetJumpTarget(exception); LDR(IndexType::Unsigned, ARM64Reg::W30, PPC_REG, PPCSTATE_OFF(msr)); TBZ(ARM64Reg::W30, 15, done_here); // MSR.EE - MOVP2R(ARM64Reg::X30, &ProcessorInterface::m_InterruptCause); - LDR(IndexType::Unsigned, ARM64Reg::W30, ARM64Reg::X30, 0); + LDR(IndexType::Unsigned, ARM64Reg::W30, ARM64Reg::X30, + MOVPage2R(ARM64Reg::X30, &ProcessorInterface::m_InterruptCause)); constexpr u32 cause_mask = ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH; @@ -951,8 +951,7 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) SetJumpTarget(exception); LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr)); TBZ(WA, 15, done_here); // MSR.EE - MOVP2R(XA, &ProcessorInterface::m_InterruptCause); - LDR(IndexType::Unsigned, WA, XA, 0); + LDR(IndexType::Unsigned, WA, XA, MOVPage2R(XA, &ProcessorInterface::m_InterruptCause)); constexpr u32 cause_mask = ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index a1c292afdf..05d3a880d7 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -42,10 +42,9 @@ void JitArm64::GenerateAsm() // Swap the stack pointer, so we have proper guard pages. ADD(ARM64Reg::X0, ARM64Reg::SP, 0); - MOVP2R(ARM64Reg::X1, &m_saved_stack_pointer); - STR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, 0); - MOVP2R(ARM64Reg::X1, &m_stack_pointer); - LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, 0); + STR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, + MOVPage2R(ARM64Reg::X1, &m_saved_stack_pointer)); + LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, MOVPage2R(ARM64Reg::X1, &m_stack_pointer)); FixupBranch no_fake_stack = CBZ(ARM64Reg::X0); ADD(ARM64Reg::SP, ARM64Reg::X0, 0); SetJumpTarget(no_fake_stack); @@ -167,8 +166,7 @@ void JitArm64::GenerateAsm() // Check the state pointer to see if we are exiting // Gets checked on at the end of every slice - MOVP2R(ARM64Reg::X0, CPU::GetStatePtr()); - LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0, 0); + LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0, MOVPage2R(ARM64Reg::X0, CPU::GetStatePtr())); CMP(ARM64Reg::W0, 0); FixupBranch Exit = B(CC_NEQ); @@ -186,8 +184,8 @@ void JitArm64::GenerateAsm() SetJumpTarget(Exit); // Reset the stack pointer, as the BLR optimization have touched it. - MOVP2R(ARM64Reg::X1, &m_saved_stack_pointer); - LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, 0); + LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, + MOVPage2R(ARM64Reg::X1, &m_saved_stack_pointer)); ADD(ARM64Reg::SP, ARM64Reg::X0, 0); m_float_emit.ABI_PopRegisters(regs_to_save_fpr, ARM64Reg::X30); @@ -526,9 +524,9 @@ void JitArm64::GenerateQuantizedLoads() float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); } @@ -544,9 +542,9 @@ void JitArm64::GenerateQuantizedLoads() float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); } @@ -561,9 +559,9 @@ void JitArm64::GenerateQuantizedLoads() float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); } @@ -578,9 +576,9 @@ void JitArm64::GenerateQuantizedLoads() float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); } @@ -607,9 +605,9 @@ void JitArm64::GenerateQuantizedLoads() float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); } @@ -625,9 +623,9 @@ void JitArm64::GenerateQuantizedLoads() float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); } @@ -642,9 +640,9 @@ void JitArm64::GenerateQuantizedLoads() float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); } @@ -659,9 +657,9 @@ void JitArm64::GenerateQuantizedLoads() float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); } @@ -727,9 +725,9 @@ void JitArm64::GenerateQuantizedStores() } const u8* storePairedU8 = GetCodePtr(); { - MOVP2R(ARM64Reg::X2, &m_quantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0); @@ -746,9 +744,9 @@ void JitArm64::GenerateQuantizedStores() } const u8* storePairedS8 = GetCodePtr(); { - MOVP2R(ARM64Reg::X2, &m_quantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0); @@ -765,9 +763,9 @@ void JitArm64::GenerateQuantizedStores() } const u8* storePairedU16 = GetCodePtr(); { - MOVP2R(ARM64Reg::X2, &m_quantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0); @@ -783,9 +781,9 @@ void JitArm64::GenerateQuantizedStores() } const u8* storePairedS16 = GetCodePtr(); // Used by Viewtiful Joe's intro movie { - MOVP2R(ARM64Reg::X2, &m_quantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0); @@ -812,9 +810,9 @@ void JitArm64::GenerateQuantizedStores() } const u8* storeSingleU8 = GetCodePtr(); // Used by MKWii { - MOVP2R(ARM64Reg::X2, &m_quantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1); float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0); @@ -831,9 +829,9 @@ void JitArm64::GenerateQuantizedStores() } const u8* storeSingleS8 = GetCodePtr(); { - MOVP2R(ARM64Reg::X2, &m_quantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1); float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0); @@ -850,9 +848,9 @@ void JitArm64::GenerateQuantizedStores() } const u8* storeSingleU16 = GetCodePtr(); // Used by MKWii { - MOVP2R(ARM64Reg::X2, &m_quantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1); float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0); @@ -868,9 +866,9 @@ void JitArm64::GenerateQuantizedStores() } const u8* storeSingleS16 = GetCodePtr(); { - MOVP2R(ARM64Reg::X2, &m_quantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1); float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit_Util.cpp index 1a76814162..1674687a5f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit_Util.cpp @@ -34,18 +34,18 @@ public: } private: - void StoreFromRegister(int sbits, ARM64Reg reg) + void StoreFromRegister(int sbits, ARM64Reg reg, s32 offset) { switch (sbits) { case 8: - m_emit->STRB(IndexType::Unsigned, reg, ARM64Reg::X0, 0); + m_emit->STRB(IndexType::Unsigned, reg, ARM64Reg::X0, offset); break; case 16: - m_emit->STRH(IndexType::Unsigned, reg, ARM64Reg::X0, 0); + m_emit->STRH(IndexType::Unsigned, reg, ARM64Reg::X0, offset); break; case 32: - m_emit->STR(IndexType::Unsigned, reg, ARM64Reg::X0, 0); + m_emit->STR(IndexType::Unsigned, reg, ARM64Reg::X0, offset); break; default: ASSERT_MSG(DYNA_REC, false, "Unknown size {} passed to MMIOWriteCodeGenerator!", sbits); @@ -55,20 +55,20 @@ private: void WriteRegToAddr(int sbits, const void* ptr, u32 mask) { - m_emit->MOVP2R(ARM64Reg::X0, ptr); + const s32 offset = m_emit->MOVPage2R(ARM64Reg::X0, ptr); // If we do not need to mask, we can do the sign extend while loading // from memory. If masking is required, we have to first zero extend, // then mask, then sign extend if needed (1 instr vs. ~4). - u32 all_ones = (1ULL << sbits) - 1; + const u32 all_ones = (1ULL << sbits) - 1; if ((all_ones & mask) == all_ones) { - StoreFromRegister(sbits, m_src_reg); + StoreFromRegister(sbits, m_src_reg, offset); } else { m_emit->ANDI2R(ARM64Reg::W1, m_src_reg, mask, ARM64Reg::W1); - StoreFromRegister(sbits, ARM64Reg::W1); + StoreFromRegister(sbits, ARM64Reg::W1, offset); } } @@ -123,24 +123,24 @@ private: m_emit->SBFM(m_dst_reg, m_dst_reg, 0, sbits - 1); } - void LoadToRegister(int sbits, bool dont_extend) + void LoadToRegister(int sbits, bool dont_extend, s32 offset) { switch (sbits) { case 8: if (m_sign_extend && !dont_extend) - m_emit->LDRSB(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0); + m_emit->LDRSB(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, offset); else - m_emit->LDRB(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0); + m_emit->LDRB(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, offset); break; case 16: if (m_sign_extend && !dont_extend) - m_emit->LDRSH(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0); + m_emit->LDRSH(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, offset); else - m_emit->LDRH(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0); + m_emit->LDRH(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, offset); break; case 32: - m_emit->LDR(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0); + m_emit->LDR(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, offset); break; default: ASSERT_MSG(DYNA_REC, false, "Unknown size {} passed to MMIOReadCodeGenerator!", sbits); @@ -150,19 +150,19 @@ private: void LoadAddrMaskToReg(int sbits, const void* ptr, u32 mask) { - m_emit->MOVP2R(ARM64Reg::X0, ptr); + const s32 offset = m_emit->MOVPage2R(ARM64Reg::X0, ptr); // If we do not need to mask, we can do the sign extend while loading // from memory. If masking is required, we have to first zero extend, // then mask, then sign extend if needed (1 instr vs. ~4). - u32 all_ones = (1ULL << sbits) - 1; + const u32 all_ones = (1ULL << sbits) - 1; if ((all_ones & mask) == all_ones) { - LoadToRegister(sbits, false); + LoadToRegister(sbits, false, offset); } else { - LoadToRegister(sbits, true); + LoadToRegister(sbits, true, offset); m_emit->ANDI2R(m_dst_reg, m_dst_reg, mask, ARM64Reg::W0); if (m_sign_extend) m_emit->SBFM(m_dst_reg, m_dst_reg, 0, sbits - 1);