diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index 296b9976c7..93a05fc144 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -1795,6 +1795,62 @@ void ARM64XEmitter::ADRP(ARM64Reg Rd, s64 imm) EncodeAddressInst(1, Rd, static_cast(imm >> 12)); } +// This is using a hand-rolled algorithm. The goal is zero memory allocations, not necessarily +// the best JIT-time time complexity. (The number of moves is usually very small.) +void ARM64XEmitter::ParallelMoves(RegisterMove* begin, RegisterMove* end, + std::array* source_gpr_usages) +{ + // X0-X7 are used for passing arguments. + // X18-X31 are either callee saved or used for special purposes. + constexpr size_t temp_reg_begin = 8; + constexpr size_t temp_reg_end = 18; + + while (begin != end) + { + bool removed_moves_during_this_loop_iteration = false; + + RegisterMove* move = end; + while (move != begin) + { + RegisterMove* prev_move = move; + --move; + if ((*source_gpr_usages)[DecodeReg(move->dst)] == 0) + { + MOV(move->dst, move->src); + (*source_gpr_usages)[DecodeReg(move->src)]--; + std::move(prev_move, end, move); + --end; + removed_moves_during_this_loop_iteration = true; + } + } + + if (!removed_moves_during_this_loop_iteration) + { + // We need to break a cycle using a temporary register. + + size_t temp_reg = temp_reg_begin; + while ((*source_gpr_usages)[temp_reg] != 0) + { + ++temp_reg; + ASSERT_MSG(COMMON, temp_reg != temp_reg_end, "Out of registers"); + } + + const ARM64Reg src = begin->src; + const ARM64Reg dst = + (Is64Bit(src) ? EncodeRegTo64 : EncodeRegTo32)(static_cast(temp_reg)); + + MOV(dst, src); + (*source_gpr_usages)[DecodeReg(dst)] = (*source_gpr_usages)[DecodeReg(src)]; + (*source_gpr_usages)[DecodeReg(src)] = 0; + + std::for_each(begin, end, [src, dst](RegisterMove& move) { + if (move.src == src) + move.src = dst; + }); + } + } +} + template void ARM64XEmitter::MOVI2RImpl(ARM64Reg Rd, T imm) { diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index 00691dc647..e8bee81f48 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -3,10 +3,12 @@ #pragma once +#include #include #include #include #include +#include #include #include "Common/ArmCommon.h" @@ -17,6 +19,7 @@ #include "Common/Common.h" #include "Common/CommonTypes.h" #include "Common/MathUtil.h" +#include "Common/SmallVector.h" namespace Arm64Gen { @@ -599,6 +602,12 @@ class ARM64XEmitter friend class ARM64FloatEmitter; private: + struct RegisterMove + { + ARM64Reg dst; + ARM64Reg src; + }; + // Pointer to memory where code will be emitted to. u8* m_code = nullptr; @@ -646,6 +655,10 @@ private: [[nodiscard]] FixupBranch WriteFixupBranch(); + // This function solves the "parallel moves" problem common in compilers. + // The arguments are mutated! + void ParallelMoves(RegisterMove* begin, RegisterMove* end, std::array* source_gpr_usages); + template void MOVI2RImpl(ARM64Reg Rd, T imm); @@ -1058,6 +1071,114 @@ public: void ABI_PushRegisters(BitSet32 registers); void ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask = BitSet32(0)); + // Plain function call + void QuickCallFunction(ARM64Reg scratchreg, const void* func); + template + void QuickCallFunction(ARM64Reg scratchreg, T func) + { + QuickCallFunction(scratchreg, (const void*)func); + } + + template + void ABI_CallFunction(FuncRet (*func)(FuncArgs...), Args... args) + { + static_assert(sizeof...(FuncArgs) == sizeof...(Args), "Wrong number of arguments"); + static_assert(sizeof...(FuncArgs) <= 8, "Passing arguments on the stack is not supported"); + + if constexpr (!std::is_void_v) + static_assert(sizeof(FuncRet) <= 16, "Large return types are not supported"); + + std::array source_gpr_uses{}; + + auto check_argument = [&](auto& arg) { + using Arg = std::decay_t; + + if constexpr (std::is_same_v) + { + ASSERT(IsGPR(arg)); + source_gpr_uses[DecodeReg(arg)]++; + } + else + { + // To be more correct, we should be checking FuncArgs here rather than Args, but that's a + // lot more effort to implement. Let's just do these best-effort checks for now. + static_assert(!std::is_floating_point_v, "Floating-point arguments are not supported"); + static_assert(sizeof(Arg) <= 8, "Arguments bigger than a register are not supported"); + } + }; + + (check_argument(args), ...); + + { + Common::SmallVector pending_moves; + + size_t i = 0; + + auto handle_register_argument = [&](auto& arg) { + using Arg = std::decay_t; + + if constexpr (std::is_same_v) + { + const ARM64Reg dst_reg = + (Is64Bit(arg) ? EncodeRegTo64 : EncodeRegTo32)(static_cast(i)); + + if (dst_reg == arg) + { + // The value is already in the right register. + source_gpr_uses[DecodeReg(arg)]--; + } + else if (source_gpr_uses[i] == 0) + { + // The destination register isn't used as the source of another move. + // We can go ahead and do the move right away. + MOV(dst_reg, arg); + source_gpr_uses[DecodeReg(arg)]--; + } + else + { + // The destination register is used as the source of a move we haven't gotten to yet. + // Let's record that we need to deal with this move later. + pending_moves.emplace_back(dst_reg, arg); + } + } + + ++i; + }; + + (handle_register_argument(args), ...); + + if (!pending_moves.empty()) + { + ParallelMoves(pending_moves.data(), pending_moves.data() + pending_moves.size(), + &source_gpr_uses); + } + } + + { + size_t i = 0; + + auto handle_immediate_argument = [&](auto& arg) { + using Arg = std::decay_t; + + if constexpr (!std::is_same_v) + { + const ARM64Reg dst_reg = + (sizeof(arg) == 8 ? EncodeRegTo64 : EncodeRegTo32)(static_cast(i)); + if constexpr (std::is_pointer_v) + MOVP2R(dst_reg, arg); + else + MOVI2R(dst_reg, arg); + } + + ++i; + }; + + (handle_immediate_argument(args), ...); + } + + QuickCallFunction(ARM64Reg::X8, func); + } + // Utility to generate a call to a std::function object. // // Unfortunately, calling operator() directly is undefined behavior in C++ @@ -1069,23 +1190,11 @@ public: return (*f)(args...); } - // This function expects you to have set up the state. - // Overwrites X0 and X8 - template - ARM64Reg ABI_SetupLambda(const std::function* f) + template + void ABI_CallLambdaFunction(const std::function* f, Args... args) { - auto trampoline = &ARM64XEmitter::CallLambdaTrampoline; - MOVP2R(ARM64Reg::X8, trampoline); - MOVP2R(ARM64Reg::X0, const_cast((const void*)f)); - return ARM64Reg::X8; - } - - // Plain function call - void QuickCallFunction(ARM64Reg scratchreg, const void* func); - template - void QuickCallFunction(ARM64Reg scratchreg, T func) - { - QuickCallFunction(scratchreg, (const void*)func); + auto trampoline = &ARM64XEmitter::CallLambdaTrampoline; + ABI_CallFunction(trampoline, f, args...); } }; diff --git a/Source/Core/Common/SmallVector.h b/Source/Core/Common/SmallVector.h index 09559ed21a..c7018f4741 100644 --- a/Source/Core/Common/SmallVector.h +++ b/Source/Core/Common/SmallVector.h @@ -29,9 +29,11 @@ public: T& operator[](size_t i) { return m_array[i]; } const T& operator[](size_t i) const { return m_array[i]; } + auto data() { return m_array.data(); } auto begin() { return m_array.begin(); } auto end() { return m_array.begin() + m_size; } + auto data() const { return m_array.data(); } auto begin() const { return m_array.begin(); } auto end() const { return m_array.begin() + m_size; } diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 961395e83c..9864fb0473 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -197,10 +197,7 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst) } Interpreter::Instruction instr = Interpreter::GetInterpreterOp(inst); - MOVP2R(ARM64Reg::X8, instr); - MOVP2R(ARM64Reg::X0, &m_system.GetInterpreter()); - MOVI2R(ARM64Reg::W1, inst.hex); - BLR(ARM64Reg::X8); + ABI_CallFunction(instr, &m_system.GetInterpreter(), inst.hex); // If the instruction wrote to any registers which were marked as discarded, // we must mark them as no longer discarded @@ -248,10 +245,7 @@ void JitArm64::HLEFunction(u32 hook_index) gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); - MOVP2R(ARM64Reg::X8, &HLE::ExecuteFromJIT); - MOVI2R(ARM64Reg::W0, js.compilerPC); - MOVI2R(ARM64Reg::W1, hook_index); - BLR(ARM64Reg::X8); + ABI_CallFunction(&HLE::ExecuteFromJIT, js.compilerPC, hook_index); } void JitArm64::DoNothing(UGeckoInstruction inst) @@ -275,21 +269,15 @@ void JitArm64::Cleanup() SUB(ARM64Reg::X0, ARM64Reg::X0, ARM64Reg::X1); CMP(ARM64Reg::X0, GPFifo::GATHER_PIPE_SIZE); FixupBranch exit = B(CC_LT); - MOVP2R(ARM64Reg::X1, &GPFifo::UpdateGatherPipe); - MOVP2R(ARM64Reg::X0, &m_system.GetGPFifo()); - BLR(ARM64Reg::X1); + ABI_CallFunction(&GPFifo::UpdateGatherPipe, &m_system.GetGPFifo()); SetJumpTarget(exit); } // SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time. if (MMCR0(m_ppc_state).Hex || MMCR1(m_ppc_state).Hex) { - MOVP2R(ARM64Reg::X8, &PowerPC::UpdatePerformanceMonitor); - MOVI2R(ARM64Reg::X0, js.downcountAmount); - MOVI2R(ARM64Reg::X1, js.numLoadStoreInst); - MOVI2R(ARM64Reg::X2, js.numFloatingPointInst); - MOVP2R(ARM64Reg::X3, &m_ppc_state); - BLR(ARM64Reg::X8); + ABI_CallFunction(&PowerPC::UpdatePerformanceMonitor, js.downcountAmount, js.numLoadStoreInst, + js.numFloatingPointInst, &m_ppc_state); } } @@ -331,10 +319,8 @@ void JitArm64::IntializeSpeculativeConstants() fail = GetCodePtr(); MOVI2R(DISPATCHER_PC, js.blockStart); STR(IndexType::Unsigned, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc)); - MOVP2R(ARM64Reg::X8, &JitInterface::CompileExceptionCheckFromJIT); - MOVP2R(ARM64Reg::X0, &m_system.GetJitInterface()); - MOVI2R(ARM64Reg::W1, static_cast(JitInterface::ExceptionType::SpeculativeConstants)); - BLR(ARM64Reg::X8); + ABI_CallFunction(&JitInterface::CompileExceptionCheckFromJIT, &m_system.GetJitInterface(), + static_cast(JitInterface::ExceptionType::SpeculativeConstants)); B(dispatcher_no_check); SwitchToNearCode(); } @@ -652,12 +638,10 @@ void JitArm64::WriteExceptionExit(ARM64Reg dest, bool only_external, bool always static_assert(PPCSTATE_OFF(pc) + 4 == PPCSTATE_OFF(npc)); STP(IndexType::Signed, DISPATCHER_PC, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc)); - MOVP2R(ARM64Reg::X0, &m_system.GetPowerPC()); - if (only_external) - MOVP2R(EncodeRegTo64(DISPATCHER_PC), &PowerPC::CheckExternalExceptionsFromJIT); - else - MOVP2R(EncodeRegTo64(DISPATCHER_PC), &PowerPC::CheckExceptionsFromJIT); - BLR(EncodeRegTo64(DISPATCHER_PC)); + const auto f = + only_external ? &PowerPC::CheckExternalExceptionsFromJIT : &PowerPC::CheckExceptionsFromJIT; + ABI_CallFunction(f, &m_system.GetPowerPC()); + EmitUpdateMembase(); LDR(IndexType::Unsigned, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(npc)); @@ -998,10 +982,8 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) SetJumpTarget(fail); MOVI2R(DISPATCHER_PC, js.blockStart); STR(IndexType::Unsigned, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc)); - MOVP2R(ARM64Reg::X0, &m_system.GetJitInterface()); - MOVI2R(ARM64Reg::W1, static_cast(JitInterface::ExceptionType::PairedQuantize)); - MOVP2R(ARM64Reg::X2, &JitInterface::CompileExceptionCheckFromJIT); - BLR(ARM64Reg::X2); + ABI_CallFunction(&JitInterface::CompileExceptionCheckFromJIT, &m_system.GetJitInterface(), + static_cast(JitInterface::ExceptionType::PairedQuantize)); B(dispatcher_no_check); SwitchToNearCode(); SetJumpTarget(no_fail); @@ -1064,9 +1046,7 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) ABI_PushRegisters(regs_in_use); m_float_emit.ABI_PushRegisters(fprs_in_use, ARM64Reg::X30); - MOVP2R(ARM64Reg::X8, &GPFifo::FastCheckGatherPipe); - MOVP2R(ARM64Reg::X0, &m_system.GetGPFifo()); - BLR(ARM64Reg::X8); + ABI_CallFunction(&GPFifo::FastCheckGatherPipe, &m_system.GetGPFifo()); m_float_emit.ABI_PopRegisters(fprs_in_use, ARM64Reg::X30); ABI_PopRegisters(regs_in_use); @@ -1182,9 +1162,7 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) MOVI2R(DISPATCHER_PC, op.address); STP(IndexType::Signed, DISPATCHER_PC, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc)); - MOVP2R(ARM64Reg::X0, &m_system.GetPowerPC()); - MOVP2R(ARM64Reg::X1, &PowerPC::CheckBreakPointsFromJIT); - BLR(ARM64Reg::X1); + ABI_CallFunction(&PowerPC::CheckBreakPointsFromJIT, &m_system.GetPowerPC()); LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0, MOVPage2R(ARM64Reg::X0, cpu.GetStatePtr())); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 8e14e694c4..e0a4845442 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -211,55 +211,45 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, src_reg = dst_reg; } - if (dst_reg != src_reg) - MOV(dst_reg, src_reg); - const bool reverse = (flags & BackPatchInfo::FLAG_REVERSE) != 0; - MOVP2R(ARM64Reg::X2, &m_mmu); - if (access_size == 64) { - MOVP2R(ARM64Reg::X8, - reverse ? &PowerPC::WriteU64SwapFromJitArm64 : &PowerPC::WriteU64FromJitArm64); + ABI_CallFunction(reverse ? &PowerPC::WriteU64SwapFromJitArm64 : + &PowerPC::WriteU64FromJitArm64, + src_reg, ARM64Reg::W1, &m_mmu); } else if (access_size == 32) { - MOVP2R(ARM64Reg::X8, - reverse ? &PowerPC::WriteU32SwapFromJitArm64 : &PowerPC::WriteU32FromJitArm64); + ABI_CallFunction(reverse ? &PowerPC::WriteU32SwapFromJitArm64 : + &PowerPC::WriteU32FromJitArm64, + src_reg, ARM64Reg::W1, &m_mmu); } else if (access_size == 16) { - MOVP2R(ARM64Reg::X8, - reverse ? &PowerPC::WriteU16SwapFromJitArm64 : &PowerPC::WriteU16FromJitArm64); + ABI_CallFunction(reverse ? &PowerPC::WriteU16SwapFromJitArm64 : + &PowerPC::WriteU16FromJitArm64, + src_reg, ARM64Reg::W1, &m_mmu); } else { - MOVP2R(ARM64Reg::X8, &PowerPC::WriteU8FromJitArm64); + ABI_CallFunction(&PowerPC::WriteU8FromJitArm64, src_reg, ARM64Reg::W1, &m_mmu); } - - BLR(ARM64Reg::X8); } else if (flags & BackPatchInfo::FLAG_ZERO_256) { - MOVP2R(ARM64Reg::X1, &m_mmu); - MOVP2R(ARM64Reg::X8, &PowerPC::ClearDCacheLineFromJitArm64); - BLR(ARM64Reg::X8); + ABI_CallFunction(&PowerPC::ClearDCacheLineFromJitArm64, ARM64Reg::W0, &m_mmu); } else { - MOVP2R(ARM64Reg::X1, &m_mmu); - if (access_size == 64) - MOVP2R(ARM64Reg::X8, &PowerPC::ReadU64FromJitArm64); + ABI_CallFunction(&PowerPC::ReadU64FromJitArm64, ARM64Reg::W0, &m_mmu); else if (access_size == 32) - MOVP2R(ARM64Reg::X8, &PowerPC::ReadU32FromJitArm64); + ABI_CallFunction(&PowerPC::ReadU32FromJitArm64, ARM64Reg::W0, &m_mmu); else if (access_size == 16) - MOVP2R(ARM64Reg::X8, &PowerPC::ReadU16FromJitArm64); + ABI_CallFunction(&PowerPC::ReadU16FromJitArm64, ARM64Reg::W0, &m_mmu); else - MOVP2R(ARM64Reg::X8, &PowerPC::ReadU8FromJitArm64); - - BLR(ARM64Reg::X8); + ABI_CallFunction(&PowerPC::ReadU8FromJitArm64, ARM64Reg::W0, &m_mmu); } m_float_emit.ABI_PopRegisters(fprs_to_push, ARM64Reg::X30); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index e353e87731..93ab5a9511 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -770,13 +770,17 @@ void JitArm64::dcbx(UGeckoInstruction inst) ABI_PushRegisters(gprs_to_push); m_float_emit.ABI_PushRegisters(fprs_to_push, WA); - MOVP2R(ARM64Reg::X0, &m_system.GetJitInterface()); - // effective_address and loop_counter are already in W1 and W2 respectively + // For efficiency, effective_addr and loop_counter are already in W1 and W2 respectively if (make_loop) - MOVP2R(ARM64Reg::X8, &JitInterface::InvalidateICacheLinesFromJIT); + { + ABI_CallFunction(&JitInterface::InvalidateICacheLinesFromJIT, &m_system.GetJitInterface(), + effective_addr, loop_counter); + } else - MOVP2R(ARM64Reg::X8, &JitInterface::InvalidateICacheLineFromJIT); - BLR(ARM64Reg::X8); + { + ABI_CallFunction(&JitInterface::InvalidateICacheLineFromJIT, &m_system.GetJitInterface(), + effective_addr); + } m_float_emit.ABI_PopRegisters(fprs_to_push, WA); ABI_PopRegisters(gprs_to_push); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 3df2fea5fa..a7cd925988 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -78,9 +78,7 @@ void JitArm64::UpdateRoundingMode() ABI_PushRegisters(gprs_to_save); m_float_emit.ABI_PushRegisters(fprs_to_save, ARM64Reg::X8); - MOVP2R(ARM64Reg::X0, &m_ppc_state); - MOVP2R(ARM64Reg::X8, &PowerPC::RoundingModeUpdated); - BLR(ARM64Reg::X8); + ABI_CallFunction(&PowerPC::RoundingModeUpdated, &m_ppc_state); m_float_emit.ABI_PopRegisters(fprs_to_save, ARM64Reg::X8); ABI_PopRegisters(gprs_to_save); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index e6296f4d31..8df7713466 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -169,10 +169,7 @@ void JitArm64::GenerateAsm() // Call JIT ResetStack(); - MOVP2R(ARM64Reg::X0, this); - MOV(ARM64Reg::W1, DISPATCHER_PC); - MOVP2R(ARM64Reg::X8, reinterpret_cast(&JitTrampoline)); - BLR(ARM64Reg::X8); + ABI_CallFunction(&JitTrampoline, this, DISPATCHER_PC); LDR(IndexType::Unsigned, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc)); B(dispatcher_no_check); @@ -189,8 +186,7 @@ void JitArm64::GenerateAsm() FixupBranch exit = CBNZ(ARM64Reg::W0); SetJumpTarget(to_start_of_timing_slice); - MOVP2R(ARM64Reg::X8, &CoreTiming::GlobalAdvance); - BLR(ARM64Reg::X8); + ABI_CallFunction(&CoreTiming::GlobalAdvance); // When we've just entered the jit we need to update the membase // GlobalAdvance also checks exceptions after which we need to diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit_Util.cpp index 0a82eb0bb8..4beb74ff1b 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit_Util.cpp @@ -78,10 +78,8 @@ private: m_emit->ABI_PushRegisters(m_gprs_in_use); float_emit.ABI_PushRegisters(m_fprs_in_use, ARM64Reg::X1); - m_emit->MOVP2R(ARM64Reg::X1, m_system); - m_emit->MOVI2R(ARM64Reg::W2, m_address); - m_emit->MOV(ARM64Reg::W3, m_src_reg); - m_emit->BLR(m_emit->ABI_SetupLambda(lambda)); + + m_emit->ABI_CallLambdaFunction(lambda, m_system, m_address, m_src_reg); float_emit.ABI_PopRegisters(m_fprs_in_use, ARM64Reg::X1); m_emit->ABI_PopRegisters(m_gprs_in_use); @@ -176,9 +174,9 @@ private: m_emit->ABI_PushRegisters(m_gprs_in_use); float_emit.ABI_PushRegisters(m_fprs_in_use, ARM64Reg::X1); - m_emit->MOVP2R(ARM64Reg::X1, m_system); - m_emit->MOVI2R(ARM64Reg::W2, m_address); - m_emit->BLR(m_emit->ABI_SetupLambda(lambda)); + + m_emit->ABI_CallLambdaFunction(lambda, m_system, m_address); + if (m_sign_extend) m_emit->SBFM(m_dst_reg, ARM64Reg::W0, 0, sbits - 1); else diff --git a/Source/UnitTests/Common/Arm64EmitterTest.cpp b/Source/UnitTests/Common/Arm64EmitterTest.cpp new file mode 100644 index 0000000000..61be07fef6 --- /dev/null +++ b/Source/UnitTests/Common/Arm64EmitterTest.cpp @@ -0,0 +1,178 @@ +// Copyright 2023 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "Common/Arm64Emitter.h" +#include "Common/BitSet.h" +#include "Common/BitUtils.h" + +#include + +using namespace Arm64Gen; + +namespace +{ +u32 ZeroParameterFunction() +{ + return 123; +} + +u32 OneParameterFunction(u64 a) +{ + return a + 23; +} + +u32 TwoParameterFunction(u64 a, u64 b) +{ + return a * 10 + b + 3; +} + +u32 ThreeParameterFunction(u64 a, u64 b, u64 c) +{ + return a * 10 + b + c / 10; +} + +u32 EightParameterFunction(u64 a, u64 b, u64 c, u64 d, u64 e, u64 f, u64 g, u64 h) +{ + return a / 20 + b / 8 + c / 10 + d / 2 + e / 5 - f + g + h / 3; +} + +class TestCallFunction : public ARM64CodeBlock +{ +public: + TestCallFunction() { AllocCodeSpace(4096); } + + template + void Emit(F f) + { + ResetCodePtr(); + + m_code_pointer = GetCodePtr(); + { + const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes; + + constexpr BitSet32 link_register{DecodeReg(ARM64Reg::X30)}; + ABI_PushRegisters(link_register); + f(); + ABI_PopRegisters(link_register); + RET(); + } + + FlushIcacheSection(const_cast(m_code_pointer), const_cast(GetCodePtr())); + } + + void Run() + { + const u64 actual = Common::BitCast(m_code_pointer)(); + constexpr u64 expected = 123; + EXPECT_EQ(expected, actual); + } + +private: + const u8* m_code_pointer = nullptr; +}; + +} // namespace + +TEST(Arm64Emitter, CallFunction_ZeroParameters) +{ + TestCallFunction test; + test.Emit([&] { test.ABI_CallFunction(&ZeroParameterFunction); }); + test.Run(); +} + +TEST(Arm64Emitter, CallFunction_OneConstantParameter) +{ + TestCallFunction test; + test.Emit([&] { test.ABI_CallFunction(&OneParameterFunction, 100); }); + test.Run(); +} + +TEST(Arm64Emitter, CallFunction_OneRegisterParameterNoMov) +{ + TestCallFunction test; + test.Emit([&] { + test.MOVI2R(ARM64Reg::X0, 100); + test.ABI_CallFunction(&OneParameterFunction, ARM64Reg::X0); + }); + test.Run(); +} + +TEST(Arm64Emitter, CallFunction_OneRegisterParameterMov) +{ + TestCallFunction test; + test.Emit([&] { + test.MOVI2R(ARM64Reg::X8, 100); + test.ABI_CallFunction(&OneParameterFunction, ARM64Reg::X8); + }); + test.Run(); +} + +TEST(Arm64Emitter, CallFunction_TwoRegistersMixed) +{ + TestCallFunction test; + test.Emit([&] { + test.MOVI2R(ARM64Reg::X0, 20); + test.ABI_CallFunction(&TwoParameterFunction, 10, ARM64Reg::X0); + }); + test.Run(); +} + +TEST(Arm64Emitter, CallFunction_TwoRegistersCycle) +{ + TestCallFunction test; + test.Emit([&] { + test.MOVI2R(ARM64Reg::X0, 20); + test.MOVI2R(ARM64Reg::X1, 10); + test.ABI_CallFunction(&TwoParameterFunction, ARM64Reg::X1, ARM64Reg::X0); + }); + test.Run(); +} + +TEST(Arm64Emitter, CallFunction_ThreeRegistersMixed) +{ + TestCallFunction test; + test.Emit([&] { + test.MOVI2R(ARM64Reg::X1, 10); + test.MOVI2R(ARM64Reg::X2, 20); + test.ABI_CallFunction(&ThreeParameterFunction, ARM64Reg::X1, ARM64Reg::X2, 30); + }); + test.Run(); +} + +TEST(Arm64Emitter, CallFunction_ThreeRegistersCycle1) +{ + TestCallFunction test; + test.Emit([&] { + test.MOVI2R(ARM64Reg::X0, 30); + test.MOVI2R(ARM64Reg::X1, 10); + test.MOVI2R(ARM64Reg::X2, 20); + test.ABI_CallFunction(&ThreeParameterFunction, ARM64Reg::X1, ARM64Reg::X2, ARM64Reg::X0); + }); + test.Run(); +} + +TEST(Arm64Emitter, CallFunction_ThreeRegistersCycle2) +{ + TestCallFunction test; + test.Emit([&] { + test.MOVI2R(ARM64Reg::X0, 20); + test.MOVI2R(ARM64Reg::X1, 30); + test.MOVI2R(ARM64Reg::X2, 10); + test.ABI_CallFunction(&ThreeParameterFunction, ARM64Reg::X2, ARM64Reg::X0, ARM64Reg::X1); + }); + test.Run(); +} + +TEST(Arm64Emitter, CallFunction_EightRegistersMixed) +{ + TestCallFunction test; + test.Emit([&] { + test.MOVI2R(ARM64Reg::X3, 12); + test.MOVI2R(ARM64Reg::X4, 23); + test.MOVI2R(ARM64Reg::X5, 24); + test.MOVI2R(ARM64Reg::X30, 2000); + test.ABI_CallFunction(&EightParameterFunction, ARM64Reg::X30, 40, ARM64Reg::X4, ARM64Reg::X5, + ARM64Reg::X4, ARM64Reg::X3, 5, ARM64Reg::X4); + }); + test.Run(); +} diff --git a/Source/UnitTests/Common/CMakeLists.txt b/Source/UnitTests/Common/CMakeLists.txt index bbea892fdf..a4c9a67ade 100644 --- a/Source/UnitTests/Common/CMakeLists.txt +++ b/Source/UnitTests/Common/CMakeLists.txt @@ -21,4 +21,6 @@ add_dolphin_test(SwapTest SwapTest.cpp) if (_M_X86) add_dolphin_test(x64EmitterTest x64EmitterTest.cpp) target_link_libraries(x64EmitterTest PRIVATE bdisasm) +elseif (_M_ARM_64) + add_dolphin_test(Arm64EmitterTest Arm64EmitterTest.cpp) endif() diff --git a/Source/UnitTests/UnitTests.vcxproj b/Source/UnitTests/UnitTests.vcxproj index d48b0cc4ff..b80b93a8b8 100644 --- a/Source/UnitTests/UnitTests.vcxproj +++ b/Source/UnitTests/UnitTests.vcxproj @@ -79,6 +79,7 @@ +