diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index d49c799744..b2f546d448 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include #include #include @@ -200,10 +201,10 @@ bool IsImmLogical(uint64_t value, unsigned int width, unsigned int* n, unsigned // To repeat a value every d bits, we multiply it by a number of the form // (1 + 2^d + 2^(2d) + ...), i.e. 0x0001000100010001 or similar. These can // be derived using a table lookup on CLZ(d). - static const std::array multipliers = { - 0x0000000000000001UL, 0x0000000100000001UL, 0x0001000100010001UL, - 0x0101010101010101UL, 0x1111111111111111UL, 0x5555555555555555UL, - }; + static const std::array multipliers = {{ + 0x0000000000000001UL, 0x0000000100000001UL, 0x0001000100010001UL, 0x0101010101010101UL, + 0x1111111111111111UL, 0x5555555555555555UL, + }}; int multiplier_idx = CountLeadingZeros(d, kXRegSizeInBits) - 57; diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index d36a343257..3564c91c01 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -120,6 +120,26 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst) SetJumpTarget(c); } } + + if (jo.memcheck && (js.op->opinfo->flags & FL_LOADSTORE)) + { + ARM64Reg WA = gpr.GetReg(); + LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); + FixupBranch noException = TBZ(WA, IntLog2(EXCEPTION_DSI)); + + FixupBranch handleException = B(); + SwitchToFarCode(); + SetJumpTarget(handleException); + + gpr.Flush(FLUSH_MAINTAIN_STATE); + fpr.Flush(FLUSH_MAINTAIN_STATE); + + WriteExceptionExit(js.compilerPC); + + SwitchToNearCode(); + SetJumpTarget(noException); + gpr.Unlock(WA); + } } void JitArm64::HLEFunction(UGeckoInstruction inst) @@ -598,26 +618,6 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB // If we have a register that will never be used again, flush it. gpr.StoreRegisters(~ops[i].gprInUse); fpr.StoreRegisters(~ops[i].fprInUse); - - if (jo.memcheck && (opinfo->flags & FL_LOADSTORE)) - { - ARM64Reg WA = gpr.GetReg(); - LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); - FixupBranch noException = TBZ(WA, IntLog2(EXCEPTION_DSI)); - - FixupBranch handleException = B(); - SwitchToFarCode(); - SetJumpTarget(handleException); - - gpr.Flush(FLUSH_MAINTAIN_STATE); - fpr.Flush(FLUSH_MAINTAIN_STATE); - - WriteExceptionExit(js.compilerPC); - - SwitchToNearCode(); - SetJumpTarget(noException); - gpr.Unlock(WA); - } } i += js.skipInstructions; diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index c2832ff460..66f9b07337 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -238,9 +238,6 @@ private: void ComputeCarry(bool Carry); void ComputeCarry(); - typedef u32 (*Operation)(u32, u32); - void reg_imm(u32 d, u32 a, u32 value, Operation do_op, - void (ARM64XEmitter::*op)(Arm64Gen::ARM64Reg, Arm64Gen::ARM64Reg, Arm64Gen::ARM64Reg, - ArithOption), - bool Rc = false); + void reg_imm(u32 d, u32 a, u32 value, u32 (*do_op)(u32, u32), + void (ARM64XEmitter::*op)(ARM64Reg, ARM64Reg, u64, ARM64Reg), bool Rc = false); }; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 0144ef3832..a72d2a26a7 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -75,25 +75,8 @@ void JitArm64::ComputeCarry() gpr.Unlock(WA); } -// Following static functions are used in conjunction with reg_imm -static u32 Or(u32 a, u32 b) -{ - return a | b; -} - -static u32 And(u32 a, u32 b) -{ - return a & b; -} - -static u32 Xor(u32 a, u32 b) -{ - return a ^ b; -} - -void JitArm64::reg_imm(u32 d, u32 a, u32 value, Operation do_op, - void (ARM64XEmitter::*op)(ARM64Reg, ARM64Reg, ARM64Reg, ArithOption), - bool Rc) +void JitArm64::reg_imm(u32 d, u32 a, u32 value, u32 (*do_op)(u32, u32), + void (ARM64XEmitter::*op)(ARM64Reg, ARM64Reg, u64, ARM64Reg), bool Rc) { if (gpr.IsImm(a)) { @@ -105,8 +88,7 @@ void JitArm64::reg_imm(u32 d, u32 a, u32 value, Operation do_op, { gpr.BindToRegister(d, d == a); ARM64Reg WA = gpr.GetReg(); - MOVI2R(WA, value); - (this->*op)(gpr.R(d), gpr.R(a), WA, ArithOption(WA, ST_LSL, 0)); + (this->*op)(gpr.R(d), gpr.R(a), value, WA); gpr.Unlock(WA); if (Rc) @@ -128,22 +110,23 @@ void JitArm64::arith_imm(UGeckoInstruction inst) // NOP return; } - reg_imm(a, s, inst.UIMM, Or, &ARM64XEmitter::ORR); + reg_imm(a, s, inst.UIMM, [](u32 a, u32 b) { return a | b; }, &ARM64XEmitter::ORRI2R); break; case 25: // oris - reg_imm(a, s, inst.UIMM << 16, Or, &ARM64XEmitter::ORR); + reg_imm(a, s, inst.UIMM << 16, [](u32 a, u32 b) { return a | b; }, &ARM64XEmitter::ORRI2R); break; case 28: // andi - reg_imm(a, s, inst.UIMM, And, &ARM64XEmitter::AND, true); + reg_imm(a, s, inst.UIMM, [](u32 a, u32 b) { return a & b; }, &ARM64XEmitter::ANDI2R, true); break; case 29: // andis - reg_imm(a, s, inst.UIMM << 16, And, &ARM64XEmitter::AND, true); + reg_imm(a, s, inst.UIMM << 16, [](u32 a, u32 b) { return a & b; }, &ARM64XEmitter::ANDI2R, + true); break; case 26: // xori - reg_imm(a, s, inst.UIMM, Xor, &ARM64XEmitter::EOR); + reg_imm(a, s, inst.UIMM, [](u32 a, u32 b) { return a ^ b; }, &ARM64XEmitter::EORI2R); break; case 27: // xoris - reg_imm(a, s, inst.UIMM << 16, Xor, &ARM64XEmitter::EOR); + reg_imm(a, s, inst.UIMM << 16, [](u32 a, u32 b) { return a ^ b; }, &ARM64XEmitter::EORI2R); break; } } @@ -272,37 +255,37 @@ void JitArm64::boolX(UGeckoInstruction inst) gpr.BindToRegister(a, (a == s) || (a == b)); if (inst.SUBOP10 == 28) // andx { - AND(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0)); + AND(gpr.R(a), gpr.R(s), gpr.R(b)); } else if (inst.SUBOP10 == 476) // nandx { - AND(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0)); + AND(gpr.R(a), gpr.R(s), gpr.R(b)); MVN(gpr.R(a), gpr.R(a)); } else if (inst.SUBOP10 == 60) // andcx { - BIC(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0)); + BIC(gpr.R(a), gpr.R(s), gpr.R(b)); } else if (inst.SUBOP10 == 444) // orx { - ORR(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0)); + ORR(gpr.R(a), gpr.R(s), gpr.R(b)); } else if (inst.SUBOP10 == 124) // norx { - ORR(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0)); + ORR(gpr.R(a), gpr.R(s), gpr.R(b)); MVN(gpr.R(a), gpr.R(a)); } else if (inst.SUBOP10 == 412) // orcx { - ORN(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0)); + ORN(gpr.R(a), gpr.R(s), gpr.R(b)); } else if (inst.SUBOP10 == 316) // xorx { - EOR(gpr.R(a), gpr.R(s), gpr.R(b), ArithOption(gpr.R(a), ST_LSL, 0)); + EOR(gpr.R(a), gpr.R(s), gpr.R(b)); } else if (inst.SUBOP10 == 284) // eqvx { - EON(gpr.R(a), gpr.R(b), gpr.R(s), ArithOption(gpr.R(a), ST_LSL, 0)); + EON(gpr.R(a), gpr.R(b), gpr.R(s)); } else { @@ -418,7 +401,7 @@ void JitArm64::negx(UGeckoInstruction inst) else { gpr.BindToRegister(d, d == a); - SUB(gpr.R(d), WSP, gpr.R(a), ArithOption(gpr.R(a), ST_LSL, 0)); + SUB(gpr.R(d), WSP, gpr.R(a)); if (inst.Rc) ComputeRC(gpr.R(d), 0); } @@ -692,8 +675,11 @@ void JitArm64::addic(UGeckoInstruction inst) else { ARM64Reg WA = gpr.GetReg(); - MOVI2R(WA, imm); - ADDS(gpr.R(d), gpr.R(a), WA); + MOVI2R(WA, std::abs(simm)); + if (simm < 0) + SUBS(gpr.R(d), gpr.R(a), WA); + else + ADDS(gpr.R(d), gpr.R(a), WA); gpr.Unlock(WA); } diff --git a/Source/Core/VideoCommon/VertexLoaderARM64.cpp b/Source/Core/VideoCommon/VertexLoaderARM64.cpp index 87ac4c6722..a1c8783581 100644 --- a/Source/Core/VideoCommon/VertexLoaderARM64.cpp +++ b/Source/Core/VideoCommon/VertexLoaderARM64.cpp @@ -9,19 +9,18 @@ using namespace Arm64Gen; -ARM64Reg src_reg = X0; -ARM64Reg dst_reg = X1; -ARM64Reg count_reg = W2; -ARM64Reg skipped_reg = W17; -ARM64Reg scratch1_reg = W16; -ARM64Reg scratch2_reg = W15; -ARM64Reg scratch3_reg = W14; -ARM64Reg scratch4_reg = W13; -ARM64Reg saved_count = W12; +constexpr ARM64Reg src_reg = X0; +constexpr ARM64Reg dst_reg = X1; +constexpr ARM64Reg count_reg = W2; +constexpr ARM64Reg skipped_reg = W17; +constexpr ARM64Reg scratch1_reg = W16; +constexpr ARM64Reg scratch2_reg = W15; +constexpr ARM64Reg scratch3_reg = W14; +constexpr ARM64Reg saved_count = W12; -ARM64Reg stride_reg = X11; -ARM64Reg arraybase_reg = X10; -ARM64Reg scale_reg = X9; +constexpr ARM64Reg stride_reg = X11; +constexpr ARM64Reg arraybase_reg = X10; +constexpr ARM64Reg scale_reg = X9; alignas(16) static const float scale_factors[] = { 1.0 / (1ULL << 0), 1.0 / (1ULL << 1), 1.0 / (1ULL << 2), 1.0 / (1ULL << 3),