Merge pull request #13251 from Sintendo/carry-opts

JitArm64_Integer: Carry flag optimizations
This commit is contained in:
JosJuice 2025-01-06 10:39:43 +01:00 committed by GitHub
commit eec2e2f07a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 195 additions and 91 deletions

View File

@ -1128,6 +1128,43 @@ void JitArm64::addzex(UGeckoInstruction inst)
int a = inst.RA, d = inst.RD;
if (gpr.IsImm(a) && (gpr.GetImm(a) == 0 || HasConstantCarry()))
{
const u32 imm = gpr.GetImm(a);
const bool is_all_ones = imm == 0xFFFFFFFF;
switch (js.carryFlag)
{
case CarryFlag::InPPCState:
{
gpr.BindToRegister(d, false);
LDRB(IndexType::Unsigned, gpr.R(d), PPC_REG, PPCSTATE_OFF(xer_ca));
ComputeCarry(false);
break;
}
case CarryFlag::InHostCarry:
{
gpr.BindToRegister(d, false);
CSET(gpr.R(d), CCFlags::CC_CS);
ComputeCarry(false);
break;
}
case CarryFlag::ConstantTrue:
{
gpr.SetImmediate(d, imm + 1);
ComputeCarry(is_all_ones);
break;
}
case CarryFlag::ConstantFalse:
{
gpr.SetImmediate(d, imm);
ComputeCarry(false);
break;
}
}
}
else
{
switch (js.carryFlag)
{
case CarryFlag::InPPCState:
@ -1170,6 +1207,7 @@ void JitArm64::addzex(UGeckoInstruction inst)
break;
}
}
}
if (inst.Rc)
ComputeRC0(gpr.R(d));
@ -1216,40 +1254,62 @@ void JitArm64::subfex(UGeckoInstruction inst)
if (gpr.IsImm(a) && (mex || gpr.IsImm(b)))
{
u32 i = gpr.GetImm(a), j = mex ? -1 : gpr.GetImm(b);
gpr.BindToRegister(d, false);
const u32 i = gpr.GetImm(a);
const u32 j = mex ? -1 : gpr.GetImm(b);
const u32 imm = ~i + j;
const bool is_zero = imm == 0;
const bool is_all_ones = imm == 0xFFFFFFFF;
switch (js.carryFlag)
{
case CarryFlag::InPPCState:
{
gpr.BindToRegister(d, false);
ARM64Reg RD = gpr.R(d);
if (is_zero)
{
LDRB(IndexType::Unsigned, RD, PPC_REG, PPCSTATE_OFF(xer_ca));
}
else
{
auto WA = gpr.GetScopedReg();
LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
ADDI2R(gpr.R(d), WA, ~i + j, gpr.R(d));
ADDI2R(RD, WA, imm, RD);
}
break;
}
case CarryFlag::InHostCarry:
{
auto WA = gpr.GetScopedReg();
MOVI2R(WA, ~i + j);
ADC(gpr.R(d), WA, ARM64Reg::WZR);
gpr.BindToRegister(d, false);
ARM64Reg RD = gpr.R(d);
if (is_all_ones)
{
// RD = -1 + carry = carry ? 0 : -1
// CSETM sets the destination to -1 if the condition is true, 0
// otherwise. Hence, the condition must be carry clear.
CSETM(RD, CC_CC);
}
else
{
MOVI2R(RD, imm);
ADC(RD, RD, ARM64Reg::WZR);
}
break;
}
case CarryFlag::ConstantTrue:
{
gpr.SetImmediate(d, ~i + j + 1);
gpr.SetImmediate(d, imm + 1);
break;
}
case CarryFlag::ConstantFalse:
{
gpr.SetImmediate(d, ~i + j);
gpr.SetImmediate(d, imm);
break;
}
}
const bool must_have_carry = Interpreter::Helper_Carry(~i, j);
const bool might_have_carry = (~i + j) == 0xFFFFFFFF;
const bool might_have_carry = is_all_ones;
if (must_have_carry)
{
@ -1337,6 +1397,15 @@ void JitArm64::subfzex(UGeckoInstruction inst)
int a = inst.RA, d = inst.RD;
if (gpr.IsImm(a) && HasConstantCarry())
{
const u32 imm = ~gpr.GetImm(a);
const u32 carry = js.carryFlag == CarryFlag::ConstantTrue;
gpr.SetImmediate(d, imm + carry);
ComputeCarry(Interpreter::Helper_Carry(imm, carry));
}
else
{
gpr.BindToRegister(d, d == a);
switch (js.carryFlag)
@ -1371,6 +1440,7 @@ void JitArm64::subfzex(UGeckoInstruction inst)
break;
}
}
}
if (inst.Rc)
ComputeRC0(gpr.R(d));
@ -1436,40 +1506,66 @@ void JitArm64::addex(UGeckoInstruction inst)
if (gpr.IsImm(a) && (mex || gpr.IsImm(b)))
{
u32 i = gpr.GetImm(a), j = mex ? -1 : gpr.GetImm(b);
gpr.BindToRegister(d, false);
const u32 i = gpr.GetImm(a), j = mex ? -1 : gpr.GetImm(b);
const u32 imm = i + j;
const bool is_zero = imm == 0;
const bool is_all_ones = imm == 0xFFFFFFFF;
switch (js.carryFlag)
{
case CarryFlag::InPPCState:
{
gpr.BindToRegister(d, false);
ARM64Reg RD = gpr.R(d);
if (is_zero)
{
LDRB(IndexType::Unsigned, RD, PPC_REG, PPCSTATE_OFF(xer_ca));
}
else
{
auto WA = gpr.GetScopedReg();
LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
ADDI2R(gpr.R(d), WA, i + j, gpr.R(d));
ADDI2R(RD, WA, imm, RD);
}
break;
}
case CarryFlag::InHostCarry:
{
gpr.BindToRegister(d, false);
ARM64Reg RD = gpr.R(d);
MOVI2R(RD, i + j);
if (is_zero)
{
// RD = 0 + carry = carry ? 1 : 0
CSET(RD, CC_CS);
}
else if (is_all_ones)
{
// RD = -1 + carry = carry ? 0 : -1
// Note that CSETM sets the destination to -1 if the condition is true,
// and 0 otherwise. Hence, the condition must be carry clear.
CSETM(RD, CC_CC);
}
else
{
MOVI2R(RD, imm);
ADC(RD, RD, ARM64Reg::WZR);
}
break;
}
case CarryFlag::ConstantTrue:
{
gpr.SetImmediate(d, i + j + 1);
gpr.SetImmediate(d, imm + 1);
break;
}
case CarryFlag::ConstantFalse:
{
gpr.SetImmediate(d, i + j);
gpr.SetImmediate(d, imm);
break;
}
}
const bool must_have_carry = Interpreter::Helper_Carry(i, j);
const bool might_have_carry = (i + j) == 0xFFFFFFFF;
const bool might_have_carry = is_all_ones;
if (must_have_carry)
{

View File

@ -110,7 +110,7 @@ JitBase::~JitBase()
CPUThreadConfigCallback::RemoveConfigChangedCallback(m_registered_config_callback_id);
}
bool JitBase::DoesConfigNeedRefresh()
bool JitBase::DoesConfigNeedRefresh() const
{
return std::ranges::any_of(JIT_SETTINGS, [this](const auto& pair) {
return this->*pair.first != Config::Get(*pair.second);
@ -276,7 +276,7 @@ bool JitBase::CanMergeNextInstructions(int count) const
return true;
}
bool JitBase::ShouldHandleFPExceptionForInstruction(const PPCAnalyst::CodeOp* op)
bool JitBase::ShouldHandleFPExceptionForInstruction(const PPCAnalyst::CodeOp* op) const
{
if (jo.fp_exceptions)
return (op->opinfo->flags & FL_FLOAT_EXCEPTION) != 0;

View File

@ -167,7 +167,7 @@ protected:
static const std::array<std::pair<bool JitBase::*, const Config::Info<bool>*>, 23> JIT_SETTINGS;
bool DoesConfigNeedRefresh();
bool DoesConfigNeedRefresh() const;
void RefreshConfig();
void InitFastmemArena();
@ -178,8 +178,16 @@ protected:
void CleanUpAfterStackFault();
bool CanMergeNextInstructions(int count) const;
bool HasConstantCarry() const
{
#ifdef _M_ARM_64
return js.carryFlag == CarryFlag::ConstantTrue || js.carryFlag == CarryFlag::ConstantFalse;
#else
return false;
#endif
}
bool ShouldHandleFPExceptionForInstruction(const PPCAnalyst::CodeOp* op);
bool ShouldHandleFPExceptionForInstruction(const PPCAnalyst::CodeOp* op) const;
public:
explicit JitBase(Core::System& system);