JitArm64: Use LogicalImm in boolX

ARM64 has a special logical immediate encoding scheme, that can be used
with AND, ORR, and EOR. By taking advantage of this, we no longer need
to materialize the immediate value in a register, saving instructions
and/or reducing register pressure.

- orx

Before:
mov    w23, #0x1
orr    w23, w25, w23

After:
orr    w23, w25, #0x1

- andx

Before:
mov    w26, #-0x80000000
and    w27, w27, w26
sxtw   x24, w27

After:
and    w27, w27, #0x80000000
sxtw   x26, w27

- eqvx

Before:
mov    w23, #0x4
eon    w26, w23, w22

After:
eor    w26, w22, #0xfffffffb

- xorx

Before:
mov    w23, #0x1e
eor    w23, w27, w23

After:
eor    w23, w27, #0x1e

- norx

Before:
mov    w25, #-0x2001
orr    w23, w23, w25
mvn    w23, w23

After:
orr    w23, w23, #0xffffdfff
mvn    w23, w23
This commit is contained in:
Bram Speeckaert 2023-07-21 14:50:13 +02:00
parent 2764978beb
commit a486168448

View File

@ -323,8 +323,10 @@ void JitArm64::boolX(UGeckoInstruction inst)
PanicAlertFmt("WTF!"); PanicAlertFmt("WTF!");
} }
} }
else if ((gpr.IsImm(s) && (gpr.GetImm(s) == 0 || gpr.GetImm(s) == 0xFFFFFFFF)) || else if ((gpr.IsImm(s) &&
(gpr.IsImm(b) && (gpr.GetImm(b) == 0 || gpr.GetImm(b) == 0xFFFFFFFF))) (gpr.GetImm(s) == 0 || gpr.GetImm(s) == 0xFFFFFFFF || LogicalImm(gpr.GetImm(s), 32))) ||
(gpr.IsImm(b) &&
(gpr.GetImm(b) == 0 || gpr.GetImm(b) == 0xFFFFFFFF || LogicalImm(gpr.GetImm(b), 32))))
{ {
int i, j; int i, j;
if (gpr.IsImm(s)) if (gpr.IsImm(s))
@ -337,7 +339,6 @@ void JitArm64::boolX(UGeckoInstruction inst)
i = b; i = b;
j = s; j = s;
} }
bool is_zero = gpr.GetImm(i) == 0;
bool complement_b = (inst.SUBOP10 == 60 /* andcx */) || (inst.SUBOP10 == 412 /* orcx */); bool complement_b = (inst.SUBOP10 == 60 /* andcx */) || (inst.SUBOP10 == 412 /* orcx */);
const bool final_not = (inst.SUBOP10 == 476 /* nandx */) || (inst.SUBOP10 == 124 /* norx */); const bool final_not = (inst.SUBOP10 == 476 /* nandx */) || (inst.SUBOP10 == 124 /* norx */);
@ -347,23 +348,39 @@ void JitArm64::boolX(UGeckoInstruction inst)
(inst.SUBOP10 == 124 /* norx */); (inst.SUBOP10 == 124 /* norx */);
const bool is_xor = (inst.SUBOP10 == 316 /* xorx */) || (inst.SUBOP10 == 284 /* eqvx */); const bool is_xor = (inst.SUBOP10 == 316 /* xorx */) || (inst.SUBOP10 == 284 /* eqvx */);
u32 imm = gpr.GetImm(i);
if ((complement_b && i == b) || (inst.SUBOP10 == 284 /* eqvx */)) if ((complement_b && i == b) || (inst.SUBOP10 == 284 /* eqvx */))
{ {
is_zero = !is_zero; imm = ~imm;
complement_b = false; complement_b = false;
} }
const bool is_zero = imm == 0;
const bool is_ones = imm == 0xFFFFFFFF;
// If imm can be represented as LogicalImm, so can ~imm.
const auto log_imm = LogicalImm(imm, 32);
if (is_xor) if (is_xor)
{ {
if (!is_zero) if (is_zero)
{
if (a != j)
{
gpr.BindToRegister(a, false);
MOV(gpr.R(a), gpr.R(j));
}
}
else
{ {
gpr.BindToRegister(a, a == j); gpr.BindToRegister(a, a == j);
MVN(gpr.R(a), gpr.R(j)); if (is_ones)
} {
else if (a != j) MVN(gpr.R(a), gpr.R(j));
{ }
gpr.BindToRegister(a, false); else
MOV(gpr.R(a), gpr.R(j)); {
EOR(gpr.R(a), gpr.R(j), log_imm);
}
} }
if (inst.Rc) if (inst.Rc)
ComputeRC0(gpr.R(a)); ComputeRC0(gpr.R(a));
@ -376,16 +393,14 @@ void JitArm64::boolX(UGeckoInstruction inst)
if (inst.Rc) if (inst.Rc)
ComputeRC0(gpr.GetImm(a)); ComputeRC0(gpr.GetImm(a));
} }
else if (final_not || complement_b) else if (is_ones)
{ {
gpr.BindToRegister(a, a == j); if (final_not || complement_b)
MVN(gpr.R(a), gpr.R(j)); {
if (inst.Rc) gpr.BindToRegister(a, a == j);
ComputeRC0(gpr.R(a)); MVN(gpr.R(a), gpr.R(j));
} }
else else if (a != j)
{
if (a != j)
{ {
gpr.BindToRegister(a, false); gpr.BindToRegister(a, false);
MOV(gpr.R(a), gpr.R(j)); MOV(gpr.R(a), gpr.R(j));
@ -393,28 +408,66 @@ void JitArm64::boolX(UGeckoInstruction inst)
if (inst.Rc) if (inst.Rc)
ComputeRC0(gpr.R(a)); ComputeRC0(gpr.R(a));
} }
else
{
if (!complement_b)
{
gpr.BindToRegister(a, a == j);
AND(gpr.R(a), gpr.R(j), log_imm);
if (final_not)
MVN(gpr.R(a), gpr.R(a));
}
else
{
// No shorter instruction sequence is possible. Just materialize the
// immediate in a register as usual, so subsequent uses can leech off
// of it.
gpr.BindToRegister(a, (a == i) || (a == j));
BIC(gpr.R(a), gpr.R(i), gpr.R(j));
}
if (inst.Rc)
ComputeRC0(gpr.R(a));
}
} }
else if (is_or) else if (is_or)
{ {
if (!is_zero) if (is_ones)
{ {
gpr.SetImmediate(a, final_not ? 0 : 0xFFFFFFFF); gpr.SetImmediate(a, final_not ? 0 : 0xFFFFFFFF);
if (inst.Rc) if (inst.Rc)
ComputeRC0(gpr.GetImm(a)); ComputeRC0(gpr.GetImm(a));
} }
else if (final_not || complement_b) else if (is_zero)
{ {
gpr.BindToRegister(a, a == j); if (final_not || complement_b)
MVN(gpr.R(a), gpr.R(j)); {
gpr.BindToRegister(a, a == j);
MVN(gpr.R(a), gpr.R(j));
}
else if (a != j)
{
gpr.BindToRegister(a, false);
MOV(gpr.R(a), gpr.R(j));
}
if (inst.Rc) if (inst.Rc)
ComputeRC0(gpr.R(a)); ComputeRC0(gpr.R(a));
} }
else else
{ {
if (a != j) if (!complement_b)
{ {
gpr.BindToRegister(a, false); gpr.BindToRegister(a, a == j);
MOV(gpr.R(a), gpr.R(j)); ORR(gpr.R(a), gpr.R(j), log_imm);
if (final_not)
MVN(gpr.R(a), gpr.R(a));
}
else
{
// No shorter instruction sequence is possible. Just materialize the
// immediate in a register as usual, so subsequent uses can leech off
// of it.
gpr.BindToRegister(a, (a == i) || (a == j));
ORN(gpr.R(a), gpr.R(i), gpr.R(j));
} }
if (inst.Rc) if (inst.Rc)
ComputeRC0(gpr.R(a)); ComputeRC0(gpr.R(a));