Merge pull request #11294 from JosJuice/jitarm64-movpage2r

Arm64Emitter: Add MOVPage2R utility function
This commit is contained in:
Mai 2022-11-23 04:41:31 +00:00 committed by GitHub
commit e573a0bbc2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 72 additions and 66 deletions

View File

@ -1009,12 +1009,20 @@ public:
void MOVP2R(ARM64Reg Rd, P* ptr) void MOVP2R(ARM64Reg Rd, P* ptr)
{ {
ASSERT_MSG(DYNA_REC, Is64Bit(Rd), "Can't store pointers in 32-bit registers"); ASSERT_MSG(DYNA_REC, Is64Bit(Rd), "Can't store pointers in 32-bit registers");
MOVI2R(Rd, (uintptr_t)ptr); MOVI2R(Rd, reinterpret_cast<uintptr_t>(ptr));
}
template <class P>
// Given an address, stores the page address into a register and returns the page-relative offset
s32 MOVPage2R(ARM64Reg Rd, P* ptr)
{
ASSERT_MSG(DYNA_REC, Is64Bit(Rd), "Can't store pointers in 32-bit registers");
MOVI2R(Rd, reinterpret_cast<uintptr_t>(ptr) & ~0xFFFULL);
return static_cast<s32>(reinterpret_cast<uintptr_t>(ptr) & 0xFFFULL);
} }
// Wrapper around AND x, y, imm etc. // Wrappers around bitwise operations with an immediate. If you're sure an imm can be encoded
// If you are sure the imm will work, preferably construct a LogicalImm directly instead, // without a scratch register, preferably construct a LogicalImm directly instead,
// since that is constexpr and thus can be done at compile-time for constant values. // since that is constexpr and thus can be done at compile time for constant values.
void ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch); void ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch);
void ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch); void ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch);
void TSTI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch) void TSTI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch)
@ -1024,6 +1032,7 @@ public:
void ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch); void ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch);
void EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch); void EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch);
// Wrappers around arithmetic operations with an immediate.
void ADDI2R_internal(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool negative, bool flags, void ADDI2R_internal(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool negative, bool flags,
ARM64Reg scratch); ARM64Reg scratch);
void ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG); void ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG);

View File

@ -915,8 +915,8 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
SetJumpTarget(exception); SetJumpTarget(exception);
LDR(IndexType::Unsigned, ARM64Reg::W30, PPC_REG, PPCSTATE_OFF(msr)); LDR(IndexType::Unsigned, ARM64Reg::W30, PPC_REG, PPCSTATE_OFF(msr));
TBZ(ARM64Reg::W30, 15, done_here); // MSR.EE TBZ(ARM64Reg::W30, 15, done_here); // MSR.EE
MOVP2R(ARM64Reg::X30, &ProcessorInterface::m_InterruptCause); LDR(IndexType::Unsigned, ARM64Reg::W30, ARM64Reg::X30,
LDR(IndexType::Unsigned, ARM64Reg::W30, ARM64Reg::X30, 0); MOVPage2R(ARM64Reg::X30, &ProcessorInterface::m_InterruptCause));
constexpr u32 cause_mask = ProcessorInterface::INT_CAUSE_CP | constexpr u32 cause_mask = ProcessorInterface::INT_CAUSE_CP |
ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_TOKEN |
ProcessorInterface::INT_CAUSE_PE_FINISH; ProcessorInterface::INT_CAUSE_PE_FINISH;
@ -951,8 +951,7 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
SetJumpTarget(exception); SetJumpTarget(exception);
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr)); LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr));
TBZ(WA, 15, done_here); // MSR.EE TBZ(WA, 15, done_here); // MSR.EE
MOVP2R(XA, &ProcessorInterface::m_InterruptCause); LDR(IndexType::Unsigned, WA, XA, MOVPage2R(XA, &ProcessorInterface::m_InterruptCause));
LDR(IndexType::Unsigned, WA, XA, 0);
constexpr u32 cause_mask = ProcessorInterface::INT_CAUSE_CP | constexpr u32 cause_mask = ProcessorInterface::INT_CAUSE_CP |
ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_TOKEN |
ProcessorInterface::INT_CAUSE_PE_FINISH; ProcessorInterface::INT_CAUSE_PE_FINISH;

View File

@ -42,10 +42,9 @@ void JitArm64::GenerateAsm()
// Swap the stack pointer, so we have proper guard pages. // Swap the stack pointer, so we have proper guard pages.
ADD(ARM64Reg::X0, ARM64Reg::SP, 0); ADD(ARM64Reg::X0, ARM64Reg::SP, 0);
MOVP2R(ARM64Reg::X1, &m_saved_stack_pointer); STR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1,
STR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, 0); MOVPage2R(ARM64Reg::X1, &m_saved_stack_pointer));
MOVP2R(ARM64Reg::X1, &m_stack_pointer); LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, MOVPage2R(ARM64Reg::X1, &m_stack_pointer));
LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, 0);
FixupBranch no_fake_stack = CBZ(ARM64Reg::X0); FixupBranch no_fake_stack = CBZ(ARM64Reg::X0);
ADD(ARM64Reg::SP, ARM64Reg::X0, 0); ADD(ARM64Reg::SP, ARM64Reg::X0, 0);
SetJumpTarget(no_fake_stack); SetJumpTarget(no_fake_stack);
@ -167,8 +166,7 @@ void JitArm64::GenerateAsm()
// Check the state pointer to see if we are exiting // Check the state pointer to see if we are exiting
// Gets checked on at the end of every slice // Gets checked on at the end of every slice
MOVP2R(ARM64Reg::X0, CPU::GetStatePtr()); LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0, MOVPage2R(ARM64Reg::X0, CPU::GetStatePtr()));
LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0, 0);
CMP(ARM64Reg::W0, 0); CMP(ARM64Reg::W0, 0);
FixupBranch Exit = B(CC_NEQ); FixupBranch Exit = B(CC_NEQ);
@ -186,8 +184,8 @@ void JitArm64::GenerateAsm()
SetJumpTarget(Exit); SetJumpTarget(Exit);
// Reset the stack pointer, as the BLR optimization have touched it. // Reset the stack pointer, as the BLR optimization have touched it.
MOVP2R(ARM64Reg::X1, &m_saved_stack_pointer); LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1,
LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, 0); MOVPage2R(ARM64Reg::X1, &m_saved_stack_pointer));
ADD(ARM64Reg::SP, ARM64Reg::X0, 0); ADD(ARM64Reg::SP, ARM64Reg::X0, 0);
m_float_emit.ABI_PopRegisters(regs_to_save_fpr, ARM64Reg::X30); m_float_emit.ABI_PopRegisters(regs_to_save_fpr, ARM64Reg::X30);
@ -526,9 +524,9 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -544,9 +542,9 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -561,9 +559,9 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -578,9 +576,9 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -607,9 +605,9 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -625,9 +623,9 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -642,9 +640,9 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -659,9 +657,9 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -727,9 +725,9 @@ void JitArm64::GenerateQuantizedStores()
} }
const u8* storePairedU8 = GetCodePtr(); const u8* storePairedU8 = GetCodePtr();
{ {
MOVP2R(ARM64Reg::X2, &m_quantizeTableS); const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0);
@ -746,9 +744,9 @@ void JitArm64::GenerateQuantizedStores()
} }
const u8* storePairedS8 = GetCodePtr(); const u8* storePairedS8 = GetCodePtr();
{ {
MOVP2R(ARM64Reg::X2, &m_quantizeTableS); const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0);
@ -765,9 +763,9 @@ void JitArm64::GenerateQuantizedStores()
} }
const u8* storePairedU16 = GetCodePtr(); const u8* storePairedU16 = GetCodePtr();
{ {
MOVP2R(ARM64Reg::X2, &m_quantizeTableS); const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0);
@ -783,9 +781,9 @@ void JitArm64::GenerateQuantizedStores()
} }
const u8* storePairedS16 = GetCodePtr(); // Used by Viewtiful Joe's intro movie const u8* storePairedS16 = GetCodePtr(); // Used by Viewtiful Joe's intro movie
{ {
MOVP2R(ARM64Reg::X2, &m_quantizeTableS); const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0);
@ -812,9 +810,9 @@ void JitArm64::GenerateQuantizedStores()
} }
const u8* storeSingleU8 = GetCodePtr(); // Used by MKWii const u8* storeSingleU8 = GetCodePtr(); // Used by MKWii
{ {
MOVP2R(ARM64Reg::X2, &m_quantizeTableS); const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0);
@ -831,9 +829,9 @@ void JitArm64::GenerateQuantizedStores()
} }
const u8* storeSingleS8 = GetCodePtr(); const u8* storeSingleS8 = GetCodePtr();
{ {
MOVP2R(ARM64Reg::X2, &m_quantizeTableS); const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0);
@ -850,9 +848,9 @@ void JitArm64::GenerateQuantizedStores()
} }
const u8* storeSingleU16 = GetCodePtr(); // Used by MKWii const u8* storeSingleU16 = GetCodePtr(); // Used by MKWii
{ {
MOVP2R(ARM64Reg::X2, &m_quantizeTableS); const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0);
@ -868,9 +866,9 @@ void JitArm64::GenerateQuantizedStores()
} }
const u8* storeSingleS16 = GetCodePtr(); const u8* storeSingleS16 = GetCodePtr();
{ {
MOVP2R(ARM64Reg::X2, &m_quantizeTableS); const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0);

View File

@ -34,18 +34,18 @@ public:
} }
private: private:
void StoreFromRegister(int sbits, ARM64Reg reg) void StoreFromRegister(int sbits, ARM64Reg reg, s32 offset)
{ {
switch (sbits) switch (sbits)
{ {
case 8: case 8:
m_emit->STRB(IndexType::Unsigned, reg, ARM64Reg::X0, 0); m_emit->STRB(IndexType::Unsigned, reg, ARM64Reg::X0, offset);
break; break;
case 16: case 16:
m_emit->STRH(IndexType::Unsigned, reg, ARM64Reg::X0, 0); m_emit->STRH(IndexType::Unsigned, reg, ARM64Reg::X0, offset);
break; break;
case 32: case 32:
m_emit->STR(IndexType::Unsigned, reg, ARM64Reg::X0, 0); m_emit->STR(IndexType::Unsigned, reg, ARM64Reg::X0, offset);
break; break;
default: default:
ASSERT_MSG(DYNA_REC, false, "Unknown size {} passed to MMIOWriteCodeGenerator!", sbits); ASSERT_MSG(DYNA_REC, false, "Unknown size {} passed to MMIOWriteCodeGenerator!", sbits);
@ -55,20 +55,20 @@ private:
void WriteRegToAddr(int sbits, const void* ptr, u32 mask) void WriteRegToAddr(int sbits, const void* ptr, u32 mask)
{ {
m_emit->MOVP2R(ARM64Reg::X0, ptr); const s32 offset = m_emit->MOVPage2R(ARM64Reg::X0, ptr);
// If we do not need to mask, we can do the sign extend while loading // If we do not need to mask, we can do the sign extend while loading
// from memory. If masking is required, we have to first zero extend, // from memory. If masking is required, we have to first zero extend,
// then mask, then sign extend if needed (1 instr vs. ~4). // then mask, then sign extend if needed (1 instr vs. ~4).
u32 all_ones = (1ULL << sbits) - 1; const u32 all_ones = (1ULL << sbits) - 1;
if ((all_ones & mask) == all_ones) if ((all_ones & mask) == all_ones)
{ {
StoreFromRegister(sbits, m_src_reg); StoreFromRegister(sbits, m_src_reg, offset);
} }
else else
{ {
m_emit->ANDI2R(ARM64Reg::W1, m_src_reg, mask, ARM64Reg::W1); m_emit->ANDI2R(ARM64Reg::W1, m_src_reg, mask, ARM64Reg::W1);
StoreFromRegister(sbits, ARM64Reg::W1); StoreFromRegister(sbits, ARM64Reg::W1, offset);
} }
} }
@ -123,24 +123,24 @@ private:
m_emit->SBFM(m_dst_reg, m_dst_reg, 0, sbits - 1); m_emit->SBFM(m_dst_reg, m_dst_reg, 0, sbits - 1);
} }
void LoadToRegister(int sbits, bool dont_extend) void LoadToRegister(int sbits, bool dont_extend, s32 offset)
{ {
switch (sbits) switch (sbits)
{ {
case 8: case 8:
if (m_sign_extend && !dont_extend) if (m_sign_extend && !dont_extend)
m_emit->LDRSB(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0); m_emit->LDRSB(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, offset);
else else
m_emit->LDRB(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0); m_emit->LDRB(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, offset);
break; break;
case 16: case 16:
if (m_sign_extend && !dont_extend) if (m_sign_extend && !dont_extend)
m_emit->LDRSH(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0); m_emit->LDRSH(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, offset);
else else
m_emit->LDRH(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0); m_emit->LDRH(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, offset);
break; break;
case 32: case 32:
m_emit->LDR(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0); m_emit->LDR(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, offset);
break; break;
default: default:
ASSERT_MSG(DYNA_REC, false, "Unknown size {} passed to MMIOReadCodeGenerator!", sbits); ASSERT_MSG(DYNA_REC, false, "Unknown size {} passed to MMIOReadCodeGenerator!", sbits);
@ -150,19 +150,19 @@ private:
void LoadAddrMaskToReg(int sbits, const void* ptr, u32 mask) void LoadAddrMaskToReg(int sbits, const void* ptr, u32 mask)
{ {
m_emit->MOVP2R(ARM64Reg::X0, ptr); const s32 offset = m_emit->MOVPage2R(ARM64Reg::X0, ptr);
// If we do not need to mask, we can do the sign extend while loading // If we do not need to mask, we can do the sign extend while loading
// from memory. If masking is required, we have to first zero extend, // from memory. If masking is required, we have to first zero extend,
// then mask, then sign extend if needed (1 instr vs. ~4). // then mask, then sign extend if needed (1 instr vs. ~4).
u32 all_ones = (1ULL << sbits) - 1; const u32 all_ones = (1ULL << sbits) - 1;
if ((all_ones & mask) == all_ones) if ((all_ones & mask) == all_ones)
{ {
LoadToRegister(sbits, false); LoadToRegister(sbits, false, offset);
} }
else else
{ {
LoadToRegister(sbits, true); LoadToRegister(sbits, true, offset);
m_emit->ANDI2R(m_dst_reg, m_dst_reg, mask, ARM64Reg::W0); m_emit->ANDI2R(m_dst_reg, m_dst_reg, mask, ARM64Reg::W0);
if (m_sign_extend) if (m_sign_extend)
m_emit->SBFM(m_dst_reg, m_dst_reg, 0, sbits - 1); m_emit->SBFM(m_dst_reg, m_dst_reg, 0, sbits - 1);