diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index d37a30b9c5..edb6c3ede8 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -14,6 +14,7 @@ #include "Core/HW/Memmap.h" #include "Core/PowerPC/JitArm64/Jit.h" +#include "Core/PowerPC/JitArm64/Jit_Util.h" #include "Core/PowerPC/JitArmCommon/BackPatch.h" #include "Core/PowerPC/MMU.h" #include "Core/PowerPC/PowerPC.h" @@ -91,17 +92,14 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR else if (flags & BackPatchInfo::FLAG_STORE) { ARM64Reg temp = ARM64Reg::W0; - if (flags & BackPatchInfo::FLAG_SIZE_32) - REV32(temp, RS); - else if (flags & BackPatchInfo::FLAG_SIZE_16) - REV16(temp, RS); + temp = ByteswapBeforeStore(this, temp, RS, flags, true); if (flags & BackPatchInfo::FLAG_SIZE_32) STR(temp, MEM_REG, addr); else if (flags & BackPatchInfo::FLAG_SIZE_16) STRH(temp, MEM_REG, addr); else - STRB(RS, MEM_REG, addr); + STRB(temp, MEM_REG, addr); } else if (flags & BackPatchInfo::FLAG_ZERO_256) { @@ -119,16 +117,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR else if (flags & BackPatchInfo::FLAG_SIZE_8) LDRB(RS, MEM_REG, addr); - if (!(flags & BackPatchInfo::FLAG_REVERSE)) - { - if (flags & BackPatchInfo::FLAG_SIZE_32) - REV32(RS, RS); - else if (flags & BackPatchInfo::FLAG_SIZE_16) - REV16(RS, RS); - } - - if (flags & BackPatchInfo::FLAG_EXTEND) - SXTH(RS, RS); + ByteswapAfterLoad(this, RS, RS, flags, true, false); } } const u8* fastmem_end = GetCodePtr(); @@ -207,7 +196,10 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR } else if (flags & BackPatchInfo::FLAG_STORE) { - MOV(ARM64Reg::W0, RS); + ARM64Reg temp = ARM64Reg::W0; + temp = ByteswapBeforeStore(this, temp, RS, flags, false); + if (temp != ARM64Reg::W0) + MOV(ARM64Reg::W0, temp); if (flags & BackPatchInfo::FLAG_SIZE_32) MOVP2R(ARM64Reg::X8, &PowerPC::Write_U32); @@ -234,20 +226,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR BLR(ARM64Reg::X8); - if (!(flags & BackPatchInfo::FLAG_REVERSE)) - { - MOV(RS, ARM64Reg::W0); - } - else - { - if (flags & BackPatchInfo::FLAG_SIZE_32) - REV32(RS, ARM64Reg::W0); - else if (flags & BackPatchInfo::FLAG_SIZE_16) - REV16(RS, ARM64Reg::W0); - } - - if (flags & BackPatchInfo::FLAG_EXTEND) - SXTH(RS, RS); + ByteswapAfterLoad(this, RS, ARM64Reg::W0, flags, false, false); } m_float_emit.ABI_PopRegisters(fprs_to_push, ARM64Reg::X30); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index d0e2f09024..574d5fd9dd 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -238,21 +238,19 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s accessSize = 8; LDR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); + + ARM64Reg temp = ARM64Reg::W1; + temp = ByteswapBeforeStore(this, temp, RS, flags, true); + if (accessSize == 32) - { - REV32(ARM64Reg::W1, RS); - STR(IndexType::Post, ARM64Reg::W1, ARM64Reg::X0, 4); - } + STR(IndexType::Post, temp, ARM64Reg::X0, 4); else if (accessSize == 16) - { - REV16(ARM64Reg::W1, RS); - STRH(IndexType::Post, ARM64Reg::W1, ARM64Reg::X0, 2); - } + STRH(IndexType::Post, temp, ARM64Reg::X0, 2); else - { - STRB(IndexType::Post, RS, ARM64Reg::X0, 1); - } + STRB(IndexType::Post, temp, ARM64Reg::X0, 1); + STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); + js.fifoBytesSinceCheck += accessSize >> 3; } else if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) @@ -260,7 +258,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s MOVI2R(XA, imm_addr); EmitBackpatchRoutine(flags, true, false, RS, XA, BitSet32(0), BitSet32(0)); } - else if (mmio_address && !(flags & BackPatchInfo::FLAG_REVERSE)) + else if (mmio_address) { MMIOWriteRegToAddr(Memory::mmio_mapping.get(), this, regs_in_use, fprs_in_use, RS, mmio_address, flags); @@ -369,6 +367,7 @@ void JitArm64::stX(UGeckoInstruction inst) switch (inst.OPCD) { case 31: + regOffset = b; switch (inst.SUBOP10) { case 183: // stwux @@ -376,21 +375,24 @@ void JitArm64::stX(UGeckoInstruction inst) [[fallthrough]]; case 151: // stwx flags |= BackPatchInfo::FLAG_SIZE_32; - regOffset = b; break; case 247: // stbux update = true; [[fallthrough]]; case 215: // stbx flags |= BackPatchInfo::FLAG_SIZE_8; - regOffset = b; break; case 439: // sthux update = true; [[fallthrough]]; case 407: // sthx flags |= BackPatchInfo::FLAG_SIZE_16; - regOffset = b; + break; + case 662: // stwbrx + flags |= BackPatchInfo::FLAG_REVERSE | BackPatchInfo::FLAG_SIZE_32; + break; + case 918: // sthbrx + flags |= BackPatchInfo::FLAG_REVERSE | BackPatchInfo::FLAG_SIZE_16; break; } break; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index 03612c00f2..2b1fd32a9c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -248,8 +248,8 @@ constexpr std::array table31{{ {247, &JitArm64::stX}, // stbux // store bytereverse - {662, &JitArm64::FallBackToInterpreter}, // stwbrx - {918, &JitArm64::FallBackToInterpreter}, // sthbrx + {662, &JitArm64::stX}, // stwbrx + {918, &JitArm64::stX}, // sthbrx {661, &JitArm64::FallBackToInterpreter}, // stswx {725, &JitArm64::FallBackToInterpreter}, // stswi diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit_Util.cpp index 529161a75b..8c1237c061 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit_Util.cpp @@ -191,6 +191,55 @@ private: bool m_sign_extend; }; +void ByteswapAfterLoad(ARM64XEmitter* emit, ARM64Reg dst_reg, ARM64Reg src_reg, u32 flags, + bool is_reversed, bool is_extended) +{ + if (is_reversed == !(flags & BackPatchInfo::FLAG_REVERSE)) + { + if (flags & BackPatchInfo::FLAG_SIZE_32) + { + emit->REV32(dst_reg, src_reg); + src_reg = dst_reg; + } + else if (flags & BackPatchInfo::FLAG_SIZE_16) + { + emit->REV16(dst_reg, src_reg); + src_reg = dst_reg; + } + } + + if (!is_extended && (flags & BackPatchInfo::FLAG_EXTEND)) + { + emit->SXTH(dst_reg, src_reg); + src_reg = dst_reg; + } + + if (dst_reg != src_reg) + emit->MOV(dst_reg, src_reg); +} + +ARM64Reg ByteswapBeforeStore(ARM64XEmitter* emit, ARM64Reg tmp_reg, ARM64Reg src_reg, u32 flags, + bool want_reversed) +{ + ARM64Reg dst_reg = src_reg; + + if (want_reversed == !(flags & BackPatchInfo::FLAG_REVERSE)) + { + if (flags & BackPatchInfo::FLAG_SIZE_32) + { + dst_reg = tmp_reg; + emit->REV32(dst_reg, src_reg); + } + else if (flags & BackPatchInfo::FLAG_SIZE_16) + { + dst_reg = tmp_reg; + emit->REV16(dst_reg, src_reg); + } + } + + return dst_reg; +} + void MMIOLoadToReg(MMIO::Mapping* mmio, Arm64Gen::ARM64XEmitter* emit, BitSet32 gprs_in_use, BitSet32 fprs_in_use, ARM64Reg dst_reg, u32 address, u32 flags) { @@ -212,11 +261,15 @@ void MMIOLoadToReg(MMIO::Mapping* mmio, Arm64Gen::ARM64XEmitter* emit, BitSet32 flags & BackPatchInfo::FLAG_EXTEND); mmio->GetHandlerForRead(address).Visit(gen); } + + ByteswapAfterLoad(emit, dst_reg, dst_reg, flags, false, true); } void MMIOWriteRegToAddr(MMIO::Mapping* mmio, Arm64Gen::ARM64XEmitter* emit, BitSet32 gprs_in_use, BitSet32 fprs_in_use, ARM64Reg src_reg, u32 address, u32 flags) { + src_reg = ByteswapBeforeStore(emit, ARM64Reg::W1, src_reg, flags, false); + if (flags & BackPatchInfo::FLAG_SIZE_8) { MMIOWriteCodeGenerator gen(emit, gprs_in_use, fprs_in_use, src_reg, address); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit_Util.h b/Source/Core/Core/PowerPC/JitArm64/Jit_Util.h index 9698f96eab..47d4b5ce95 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit_Util.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit_Util.h @@ -8,6 +8,12 @@ #include "Core/HW/MMIO.h" +void ByteswapAfterLoad(Arm64Gen::ARM64XEmitter* emit, Arm64Gen::ARM64Reg dst_reg, + Arm64Gen::ARM64Reg src_reg, u32 flags, bool is_reversed, bool is_extended); + +Arm64Gen::ARM64Reg ByteswapBeforeStore(Arm64Gen::ARM64XEmitter* emit, Arm64Gen::ARM64Reg tmp_reg, + Arm64Gen::ARM64Reg src_reg, u32 flags, bool want_reversed); + void MMIOLoadToReg(MMIO::Mapping* mmio, Arm64Gen::ARM64XEmitter* emit, BitSet32 gprs_in_use, BitSet32 fprs_in_use, Arm64Gen::ARM64Reg dst_reg, u32 address, u32 flags);