diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index 4cb4086d7c..355275caf8 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -2874,7 +2874,7 @@ void ARM64FloatEmitter::FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 EmitVectorxElement(0, 2 | (size >> 6), L, 0b1001, H, Rd, Rn, Rm); } -void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers) +void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers, ARM64Reg tmp) { bool bundled_loadstore = false; int num_regs = registers.Count(); @@ -2904,15 +2904,10 @@ void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers) } } - if (!bundled_loadstore) + if (bundled_loadstore && tmp != INVALID_REG) { - for (auto it : registers) - STR(128, INDEX_PRE, (ARM64Reg)(Q0 + it), SP, -16); - } - else - { - // Violating the AAPCS64 never felt so right. m_emit->SUB(SP, SP, num_regs * 16); + m_emit->ADD(tmp, SP, 0); for (int i = 0; i < 32; ++i) { if (!registers[i]) @@ -2927,14 +2922,18 @@ void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers) // 4 < 4 && registers[i + 4] false! while (++count < 4 && (i + count) < 32 && registers[i + count]) {} - ST1(64, count, INDEX_POST, (ARM64Reg)(Q0 + i), SP); + ST1(64, count, INDEX_POST, (ARM64Reg)(Q0 + i), tmp); i += count - 1; } - m_emit->SUB(SP, SP, num_regs * 16); + } + else + { + for (auto it : registers) + STR(128, INDEX_PRE, (ARM64Reg)(Q0 + it), SP, -16); } } -void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers) +void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers, ARM64Reg tmp) { bool bundled_loadstore = false; int num_regs = registers.Count(); @@ -2964,18 +2963,9 @@ void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers) } } - if (!bundled_loadstore) - { - for (int i = 31; i >= 0; --i) - { - if (!registers[i]) - continue; - - LDR(128, INDEX_POST, (ARM64Reg)(Q0 + i), SP, 16); - } - } - else + if (bundled_loadstore && tmp != INVALID_REG) { + // The temporary register is only used to indicate that we can use this code path for (int i = 0; i < 32; ++i) { if (!registers[i]) @@ -2989,6 +2979,16 @@ void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers) i += count - 1; } } + else + { + for (int i = 31; i >= 0; --i) + { + if (!registers[i]) + continue; + + LDR(128, INDEX_POST, (ARM64Reg)(Q0 + i), SP, 16); + } + } } } diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index 3fd3d390c7..ab933225c5 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -755,8 +755,8 @@ public: void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index); // ABI related - void ABI_PushRegisters(BitSet32 registers); - void ABI_PopRegisters(BitSet32 registers); + void ABI_PushRegisters(BitSet32 registers, ARM64Reg tmp = INVALID_REG); + void ABI_PopRegisters(BitSet32 registers, ARM64Reg tmp = INVALID_REG); private: ARM64XEmitter* m_emit; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index 62cce8374b..e82367873b 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -279,12 +279,12 @@ void JitArm64AsmRoutineManager::GenerateCommon() SetJumpTarget(argh); ABI_PushRegisters(gprs); - float_emit.ABI_PushRegisters(fprs); + float_emit.ABI_PushRegisters(fprs, X3); float_emit.UMOV(64, X0, Q0, 0); ORR(X0, SP, X0, ArithOption(X0, ST_ROR, 32)); MOVI2R(X30, (u64)PowerPC::Write_U64); BLR(X30); - float_emit.ABI_PopRegisters(fprs); + float_emit.ABI_PopRegisters(fprs, X3); ABI_PopRegisters(gprs); RET(X30); } @@ -310,12 +310,12 @@ void JitArm64AsmRoutineManager::GenerateCommon() SetJumpTarget(argh); ABI_PushRegisters(gprs); - float_emit.ABI_PushRegisters(fprs); + float_emit.ABI_PushRegisters(fprs, X3); float_emit.UMOV(16, W0, Q0, 0); REV16(W0, W0); MOVI2R(X30, (u64)PowerPC::Write_U16); BLR(X30); - float_emit.ABI_PopRegisters(fprs); + float_emit.ABI_PopRegisters(fprs, X3); ABI_PopRegisters(gprs); RET(X30); } @@ -341,12 +341,12 @@ void JitArm64AsmRoutineManager::GenerateCommon() SetJumpTarget(argh); ABI_PushRegisters(gprs); - float_emit.ABI_PushRegisters(fprs); + float_emit.ABI_PushRegisters(fprs, X3); float_emit.UMOV(16, W0, Q0, 0); REV16(W0, W0); MOVI2R(X30, (u64)PowerPC::Write_U16); BLR(X30); - float_emit.ABI_PopRegisters(fprs); + float_emit.ABI_PopRegisters(fprs, X3); ABI_PopRegisters(gprs); RET(X30); } @@ -372,12 +372,12 @@ void JitArm64AsmRoutineManager::GenerateCommon() SetJumpTarget(argh); ABI_PushRegisters(gprs); - float_emit.ABI_PushRegisters(fprs); + float_emit.ABI_PushRegisters(fprs, X3); float_emit.REV32(8, D0, D0); float_emit.UMOV(32, W0, Q0, 0); MOVI2R(X30, (u64)PowerPC::Write_U32); BLR(X30); - float_emit.ABI_PopRegisters(fprs); + float_emit.ABI_PopRegisters(fprs, X3); ABI_PopRegisters(gprs); RET(X30); } @@ -402,12 +402,12 @@ void JitArm64AsmRoutineManager::GenerateCommon() SetJumpTarget(argh); ABI_PushRegisters(gprs); - float_emit.ABI_PushRegisters(fprs); + float_emit.ABI_PushRegisters(fprs, X3); float_emit.REV32(8, D0, D0); float_emit.UMOV(32, W0, Q0, 0); MOVI2R(X30, (u64)PowerPC::Write_U32); BLR(X30); - float_emit.ABI_PopRegisters(fprs); + float_emit.ABI_PopRegisters(fprs, X3); ABI_PopRegisters(gprs); RET(X30); } @@ -428,11 +428,11 @@ void JitArm64AsmRoutineManager::GenerateCommon() SetJumpTarget(argh); ABI_PushRegisters(gprs); - float_emit.ABI_PushRegisters(fprs); + float_emit.ABI_PushRegisters(fprs, X3); float_emit.UMOV(32, W0, Q0, 0); MOVI2R(X30, (u64)&PowerPC::Write_U32); BLR(X30); - float_emit.ABI_PopRegisters(fprs); + float_emit.ABI_PopRegisters(fprs, X3); ABI_PopRegisters(gprs); RET(X30); } @@ -457,11 +457,11 @@ void JitArm64AsmRoutineManager::GenerateCommon() SetJumpTarget(argh); ABI_PushRegisters(gprs); - float_emit.ABI_PushRegisters(fprs); + float_emit.ABI_PushRegisters(fprs, X3); float_emit.UMOV(32, W0, Q0, 0); MOVI2R(X30, (u64)&PowerPC::Write_U8); BLR(X30); - float_emit.ABI_PopRegisters(fprs); + float_emit.ABI_PopRegisters(fprs, X3); ABI_PopRegisters(gprs); RET(X30); } @@ -486,11 +486,11 @@ void JitArm64AsmRoutineManager::GenerateCommon() SetJumpTarget(argh); ABI_PushRegisters(gprs); - float_emit.ABI_PushRegisters(fprs); + float_emit.ABI_PushRegisters(fprs, X3); float_emit.SMOV(32, W0, Q0, 0); MOVI2R(X30, (u64)&PowerPC::Write_U8); BLR(X30); - float_emit.ABI_PopRegisters(fprs); + float_emit.ABI_PopRegisters(fprs, X3); ABI_PopRegisters(gprs); RET(X30); } @@ -515,11 +515,11 @@ void JitArm64AsmRoutineManager::GenerateCommon() SetJumpTarget(argh); ABI_PushRegisters(gprs); - float_emit.ABI_PushRegisters(fprs); + float_emit.ABI_PushRegisters(fprs, X3); float_emit.UMOV(32, W0, Q0, 0); MOVI2R(X30, (u64)&PowerPC::Write_U16); BLR(X30); - float_emit.ABI_PopRegisters(fprs); + float_emit.ABI_PopRegisters(fprs, X3); ABI_PopRegisters(gprs); RET(X30); } @@ -544,11 +544,11 @@ void JitArm64AsmRoutineManager::GenerateCommon() SetJumpTarget(argh); ABI_PushRegisters(gprs); - float_emit.ABI_PushRegisters(fprs); + float_emit.ABI_PushRegisters(fprs, X3); float_emit.SMOV(32, W0, Q0, 0); MOVI2R(X30, (u64)&PowerPC::Write_U16); BLR(X30); - float_emit.ABI_PopRegisters(fprs); + float_emit.ABI_PopRegisters(fprs, X3); ABI_PopRegisters(gprs); RET(X30); }