mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-10 16:19:28 +01:00
JitArm64: Allocate scratch registers inside EmitBackpatchRoutine
This cuts down on how much callers have to think about what registers EmitBackpatchRoutine is using. Also, by allocating registers dynamically instead of using a fixed set of registers, we improve codegen in cases where the fixed registers are taken but other registers are free. (These improvements don't apply to the emitting_routine == true case, where everything still works like before by necessity.)
This commit is contained in:
parent
9ab2751229
commit
527ad0b99b
@ -252,29 +252,40 @@ protected:
|
|||||||
//
|
//
|
||||||
// Registers used:
|
// Registers used:
|
||||||
//
|
//
|
||||||
// addr scratch
|
// addr
|
||||||
// Store: X2 X1
|
// Store: X2
|
||||||
// Load: X1
|
// Load: X1
|
||||||
// Zero 256: X1 X30
|
// Zero 256: X1
|
||||||
// Store float: X2 Q0
|
// Store float: X2
|
||||||
// Load float: X1
|
// Load float: X1
|
||||||
//
|
//
|
||||||
// If mode == AlwaysFastAccess, the addr argument can be any register.
|
// If mode == AlwaysFastAccess, the addr argument can be any register.
|
||||||
// Otherwise it must be the register listed in the table above.
|
// Otherwise it must be the register listed in the table above.
|
||||||
//
|
//
|
||||||
// Additional scratch registers are used in the following situations:
|
// This routine allocates most scratch registers dynamically, but in the following
|
||||||
|
// situations, specific scratch registers have to be allocated in advance:
|
||||||
//
|
//
|
||||||
// emitting_routine && mode == Auto: X0
|
// emitting_routine && mode == Auto: X0
|
||||||
|
// emitting_routine && mode == Auto && (flags & BackPatchInfo::FLAG_STORE): X1
|
||||||
// emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3
|
// emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3
|
||||||
// emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X3
|
// emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X3
|
||||||
// mode != AlwaysSlowAccess && !jo.fastmem: X0
|
// emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X0
|
||||||
// !emitting_routine && mode != AlwaysFastAccess && jo.memcheck &&
|
// emitting_routine && mode != AlwaysSlowAccess &&
|
||||||
// (flags & BackPatchInfo::FLAG_LOAD): X0
|
// (flags & BackPatchInfo::FLAG_STORE) && !(flags & BackPatchInfo::FLAG_FLOAT): X1
|
||||||
// !emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X30
|
// emitting_routine && mode != AlwaysSlowAccess &&
|
||||||
|
// (flags & BackPatchInfo::FLAG_STORE) && (flags & BackPatchInfo::FLAG_FLOAT): Q0
|
||||||
|
// emitting_routine && mode != AlwaysSlowAccess &&
|
||||||
|
// (flags & BackPatchInfo::FLAG_ZERO_256): X30
|
||||||
// !emitting_routine && mode == Auto && jo.fastmem: X30
|
// !emitting_routine && mode == Auto && jo.fastmem: X30
|
||||||
//
|
//
|
||||||
// If there are any other registers that the caller doesn't mind being overwritten,
|
// If there are any other registers that the caller doesn't mind being overwritten,
|
||||||
// these can be indicated in scratch_gprs and scratch_fprs.
|
// these can be indicated in scratch_gprs and scratch_fprs.
|
||||||
|
//
|
||||||
|
// In the following situations, certain host registers must not contain guest registers:
|
||||||
|
//
|
||||||
|
// !emitting_routine && mode != AlwaysFastAccess && jo.memcheck: X30
|
||||||
|
// !emitting_routine && mode != AlwaysFastAccess && jo.memcheck &&
|
||||||
|
// (flags & BackPatchInfo::FLAG_LOAD): X0
|
||||||
void EmitBackpatchRoutine(u32 flags, MemAccessMode mode, Arm64Gen::ARM64Reg RS,
|
void EmitBackpatchRoutine(u32 flags, MemAccessMode mode, Arm64Gen::ARM64Reg RS,
|
||||||
Arm64Gen::ARM64Reg addr, BitSet32 scratch_gprs = BitSet32(0),
|
Arm64Gen::ARM64Reg addr, BitSet32 scratch_gprs = BitSet32(0),
|
||||||
BitSet32 scratch_fprs = BitSet32(0), bool emitting_routine = false);
|
BitSet32 scratch_fprs = BitSet32(0), bool emitting_routine = false);
|
||||||
|
@ -65,11 +65,140 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||||||
const bool emit_fast_access = mode != MemAccessMode::AlwaysSlowAccess;
|
const bool emit_fast_access = mode != MemAccessMode::AlwaysSlowAccess;
|
||||||
const bool emit_slow_access = mode != MemAccessMode::AlwaysFastAccess;
|
const bool emit_slow_access = mode != MemAccessMode::AlwaysFastAccess;
|
||||||
|
|
||||||
const BitSet32 gprs_to_push =
|
const bool memcheck = jo.memcheck && !emitting_routine;
|
||||||
|
|
||||||
|
BitSet32 temp_gpr_candidates = scratch_gprs;
|
||||||
|
BitSet32 temp_fpr_candidates = scratch_fprs;
|
||||||
|
temp_gpr_candidates[DecodeReg(addr)] = false;
|
||||||
|
if (flags & BackPatchInfo::FLAG_FLOAT)
|
||||||
|
temp_fpr_candidates[DecodeReg(RS)] = false;
|
||||||
|
else if (!(flags & BackPatchInfo::FLAG_ZERO_256))
|
||||||
|
temp_gpr_candidates[DecodeReg(RS)] = false;
|
||||||
|
if (!emitting_routine && mode == MemAccessMode::Auto && jo.fastmem)
|
||||||
|
temp_gpr_candidates[30] = true;
|
||||||
|
|
||||||
|
const auto allocate_temp_reg = [this](Arm64RegCache& reg_cache,
|
||||||
|
BitSet32& candidates) -> Arm64RegCache::ScopedARM64Reg {
|
||||||
|
for (int i : candidates)
|
||||||
|
{
|
||||||
|
candidates[i] = false;
|
||||||
|
ARM64Reg reg = ARM64Reg(i);
|
||||||
|
if (®_cache == &fpr)
|
||||||
|
reg = EncodeRegToQuad(reg);
|
||||||
|
return reg;
|
||||||
|
}
|
||||||
|
return reg_cache.GetScopedReg();
|
||||||
|
};
|
||||||
|
|
||||||
|
const auto can_allocate_temp_reg_for_free = [](Arm64RegCache& reg_cache, BitSet32& candidates) {
|
||||||
|
return candidates != BitSet32{} || reg_cache.GetUnlockedRegisterCount() > 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
Arm64RegCache::ScopedARM64Reg temp_gpr_1;
|
||||||
|
Arm64RegCache::ScopedARM64Reg temp_gpr_2;
|
||||||
|
Arm64RegCache::ScopedARM64Reg temp_gpr_3;
|
||||||
|
Arm64RegCache::ScopedARM64Reg temp_fpr_1;
|
||||||
|
|
||||||
|
if (emit_fast_access)
|
||||||
|
{
|
||||||
|
if ((flags & BackPatchInfo::FLAG_STORE) && (flags & BackPatchInfo::FLAG_FLOAT))
|
||||||
|
{
|
||||||
|
temp_fpr_1 = emitting_routine ? Arm64RegCache::ScopedARM64Reg(ARM64Reg::Q0) :
|
||||||
|
allocate_temp_reg(fpr, temp_fpr_candidates);
|
||||||
|
scratch_fprs[DecodeReg(temp_fpr_1)] = true;
|
||||||
|
}
|
||||||
|
else if (flags & BackPatchInfo::FLAG_STORE)
|
||||||
|
{
|
||||||
|
temp_gpr_1 = emitting_routine ? Arm64RegCache::ScopedARM64Reg(ARM64Reg::W1) :
|
||||||
|
allocate_temp_reg(gpr, temp_gpr_candidates);
|
||||||
|
scratch_gprs[DecodeReg(temp_gpr_1)] = true;
|
||||||
|
}
|
||||||
|
else if (flags & BackPatchInfo::FLAG_ZERO_256)
|
||||||
|
{
|
||||||
|
temp_gpr_1 = emitting_routine ? Arm64RegCache::ScopedARM64Reg(ARM64Reg::W30) :
|
||||||
|
allocate_temp_reg(gpr, temp_gpr_candidates);
|
||||||
|
scratch_gprs[DecodeReg(temp_gpr_1)] = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!jo.fastmem)
|
||||||
|
{
|
||||||
|
temp_gpr_2 = emitting_routine ? Arm64RegCache::ScopedARM64Reg(ARM64Reg::W0) :
|
||||||
|
allocate_temp_reg(gpr, temp_gpr_candidates);
|
||||||
|
temp_gpr_3 = emitting_routine ? Arm64RegCache::ScopedARM64Reg(ARM64Reg::W3) :
|
||||||
|
allocate_temp_reg(gpr, temp_gpr_candidates);
|
||||||
|
scratch_gprs[DecodeReg(temp_gpr_2)] = true;
|
||||||
|
scratch_gprs[DecodeReg(temp_gpr_3)] = true;
|
||||||
|
}
|
||||||
|
else if (emit_slow_access && emitting_routine)
|
||||||
|
{
|
||||||
|
temp_gpr_2 = ARM64Reg::W0;
|
||||||
|
temp_gpr_3 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W1 : ARM64Reg::W3;
|
||||||
|
scratch_gprs[DecodeReg(temp_gpr_2)] = true;
|
||||||
|
scratch_gprs[DecodeReg(temp_gpr_3)] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Setting memcheck_temp_gpr to W30 works, but because W30 is a register that needs to be pushed
|
||||||
|
// and popped, using W30 may require us to emit an extra push and pop instruction, depending on
|
||||||
|
// what other registers need pushing and popping. If we can find another register to use without
|
||||||
|
// having to evict anything from the register cache, let's do that instead of using W30.
|
||||||
|
ARM64Reg memcheck_temp_gpr = ARM64Reg::W30;
|
||||||
|
if (emit_slow_access && memcheck)
|
||||||
|
{
|
||||||
|
const auto is_suitable_as_memcheck_temp_gpr = [flags](ARM64Reg reg) {
|
||||||
|
return reg != ARM64Reg::INVALID_REG && reg != ARM64Reg::W30 &&
|
||||||
|
(reg != ARM64Reg::W0 || !(flags & BackPatchInfo::FLAG_LOAD));
|
||||||
|
};
|
||||||
|
|
||||||
|
const auto get_unset_temp_gpr = [&]() -> Arm64RegCache::ScopedARM64Reg& {
|
||||||
|
if (temp_gpr_1 == ARM64Reg::INVALID_REG)
|
||||||
|
return temp_gpr_1;
|
||||||
|
if (temp_gpr_2 == ARM64Reg::INVALID_REG)
|
||||||
|
return temp_gpr_2;
|
||||||
|
ASSERT(temp_gpr_3 == ARM64Reg::INVALID_REG);
|
||||||
|
return temp_gpr_3;
|
||||||
|
};
|
||||||
|
|
||||||
|
if (is_suitable_as_memcheck_temp_gpr(temp_gpr_1))
|
||||||
|
{
|
||||||
|
memcheck_temp_gpr = temp_gpr_1;
|
||||||
|
}
|
||||||
|
else if (is_suitable_as_memcheck_temp_gpr(temp_gpr_2))
|
||||||
|
{
|
||||||
|
memcheck_temp_gpr = temp_gpr_2;
|
||||||
|
}
|
||||||
|
else if (is_suitable_as_memcheck_temp_gpr(temp_gpr_3))
|
||||||
|
{
|
||||||
|
memcheck_temp_gpr = temp_gpr_3;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
while (can_allocate_temp_reg_for_free(gpr, temp_gpr_candidates))
|
||||||
|
{
|
||||||
|
Arm64RegCache::ScopedARM64Reg& temp_gpr_x = get_unset_temp_gpr();
|
||||||
|
temp_gpr_x = allocate_temp_reg(gpr, temp_gpr_candidates);
|
||||||
|
scratch_gprs[DecodeReg(temp_gpr_x)] = true;
|
||||||
|
if (is_suitable_as_memcheck_temp_gpr(temp_gpr_x))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (temp_fpr_1 == ARM64Reg::INVALID_REG &&
|
||||||
|
can_allocate_temp_reg_for_free(fpr, temp_fpr_candidates))
|
||||||
|
{
|
||||||
|
temp_fpr_1 = allocate_temp_reg(fpr, temp_fpr_candidates);
|
||||||
|
scratch_fprs[DecodeReg(temp_fpr_1)] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
BitSet32 gprs_to_push =
|
||||||
(emitting_routine ? CALLER_SAVED_GPRS : gpr.GetCallerSavedUsed()) & ~scratch_gprs;
|
(emitting_routine ? CALLER_SAVED_GPRS : gpr.GetCallerSavedUsed()) & ~scratch_gprs;
|
||||||
const BitSet32 fprs_to_push =
|
BitSet32 fprs_to_push =
|
||||||
(emitting_routine ? BitSet32(0xFFFFFFFF) : fpr.GetCallerSavedUsed()) & ~scratch_fprs;
|
(emitting_routine ? BitSet32(0xFFFFFFFF) : fpr.GetCallerSavedUsed()) & ~scratch_fprs;
|
||||||
|
|
||||||
|
if (!emitting_routine && mode == MemAccessMode::Auto && jo.fastmem)
|
||||||
|
gprs_to_push[30] = true;
|
||||||
|
|
||||||
bool in_far_code = false;
|
bool in_far_code = false;
|
||||||
const u8* fast_access_start = GetCodePtr();
|
const u8* fast_access_start = GetCodePtr();
|
||||||
std::optional<FixupBranch> slow_access_fixup;
|
std::optional<FixupBranch> slow_access_fixup;
|
||||||
@ -81,13 +210,11 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||||||
|
|
||||||
if (!jo.fastmem)
|
if (!jo.fastmem)
|
||||||
{
|
{
|
||||||
const ARM64Reg temp = emitting_routine ? ARM64Reg::W3 : ARM64Reg::W30;
|
memory_base = EncodeRegTo64(temp_gpr_3);
|
||||||
|
memory_offset = temp_gpr_2;
|
||||||
|
|
||||||
memory_base = EncodeRegTo64(temp);
|
LSR(temp_gpr_3, addr, PowerPC::BAT_INDEX_SHIFT);
|
||||||
memory_offset = ARM64Reg::W0;
|
LDR(memory_base, MEM_REG, ArithOption(temp_gpr_3, true));
|
||||||
|
|
||||||
LSR(temp, addr, PowerPC::BAT_INDEX_SHIFT);
|
|
||||||
LDR(memory_base, MEM_REG, ArithOption(temp, true));
|
|
||||||
|
|
||||||
if (emit_slow_access)
|
if (emit_slow_access)
|
||||||
{
|
{
|
||||||
@ -100,15 +227,12 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||||||
}
|
}
|
||||||
else if (emit_slow_access && emitting_routine)
|
else if (emit_slow_access && emitting_routine)
|
||||||
{
|
{
|
||||||
const ARM64Reg temp1 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W1 : ARM64Reg::W3;
|
slow_access_fixup = CheckIfSafeAddress(addr, temp_gpr_3, temp_gpr_2);
|
||||||
const ARM64Reg temp2 = ARM64Reg::W0;
|
|
||||||
|
|
||||||
slow_access_fixup = CheckIfSafeAddress(addr, temp1, temp2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((flags & BackPatchInfo::FLAG_STORE) && (flags & BackPatchInfo::FLAG_FLOAT))
|
if ((flags & BackPatchInfo::FLAG_STORE) && (flags & BackPatchInfo::FLAG_FLOAT))
|
||||||
{
|
{
|
||||||
ARM64Reg temp = ARM64Reg::D0;
|
ARM64Reg temp = EncodeRegToDouble(temp_fpr_1);
|
||||||
temp = ByteswapBeforeStore(this, &m_float_emit, temp, EncodeRegToDouble(RS), flags, true);
|
temp = ByteswapBeforeStore(this, &m_float_emit, temp, EncodeRegToDouble(RS), flags, true);
|
||||||
|
|
||||||
m_float_emit.STR(access_size, temp, memory_base, memory_offset);
|
m_float_emit.STR(access_size, temp, memory_base, memory_offset);
|
||||||
@ -122,7 +246,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||||||
}
|
}
|
||||||
else if (flags & BackPatchInfo::FLAG_STORE)
|
else if (flags & BackPatchInfo::FLAG_STORE)
|
||||||
{
|
{
|
||||||
ARM64Reg temp = ARM64Reg::W1;
|
ARM64Reg temp = temp_gpr_1;
|
||||||
temp = ByteswapBeforeStore(this, &m_float_emit, temp, RS, flags, true);
|
temp = ByteswapBeforeStore(this, &m_float_emit, temp, RS, flags, true);
|
||||||
|
|
||||||
if (flags & BackPatchInfo::FLAG_SIZE_32)
|
if (flags & BackPatchInfo::FLAG_SIZE_32)
|
||||||
@ -135,7 +259,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||||||
else if (flags & BackPatchInfo::FLAG_ZERO_256)
|
else if (flags & BackPatchInfo::FLAG_ZERO_256)
|
||||||
{
|
{
|
||||||
// This literally only stores 32bytes of zeros to the target address
|
// This literally only stores 32bytes of zeros to the target address
|
||||||
ARM64Reg temp = ARM64Reg::X30;
|
ARM64Reg temp = EncodeRegTo64(temp_gpr_1);
|
||||||
ADD(temp, memory_base, memory_offset);
|
ADD(temp, memory_base, memory_offset);
|
||||||
STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 0);
|
STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 0);
|
||||||
STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 16);
|
STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 16);
|
||||||
@ -156,8 +280,6 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||||||
|
|
||||||
if (emit_slow_access)
|
if (emit_slow_access)
|
||||||
{
|
{
|
||||||
const bool memcheck = jo.memcheck && !emitting_routine;
|
|
||||||
|
|
||||||
if (emit_fast_access)
|
if (emit_fast_access)
|
||||||
{
|
{
|
||||||
in_far_code = true;
|
in_far_code = true;
|
||||||
@ -174,12 +296,9 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||||||
if (slow_access_fixup)
|
if (slow_access_fixup)
|
||||||
SetJumpTarget(*slow_access_fixup);
|
SetJumpTarget(*slow_access_fixup);
|
||||||
|
|
||||||
const ARM64Reg temp_gpr = ARM64Reg::W1;
|
|
||||||
const int temp_gpr_index = DecodeReg(temp_gpr);
|
|
||||||
|
|
||||||
BitSet32 gprs_to_push_early = {};
|
BitSet32 gprs_to_push_early = {};
|
||||||
if (memcheck)
|
if (memcheck)
|
||||||
gprs_to_push_early[temp_gpr_index] = true;
|
gprs_to_push_early[DecodeReg(memcheck_temp_gpr)] = true;
|
||||||
if (flags & BackPatchInfo::FLAG_LOAD)
|
if (flags & BackPatchInfo::FLAG_LOAD)
|
||||||
gprs_to_push_early[0] = true;
|
gprs_to_push_early[0] = true;
|
||||||
|
|
||||||
@ -270,11 +389,10 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||||||
|
|
||||||
if (memcheck)
|
if (memcheck)
|
||||||
{
|
{
|
||||||
const ARM64Reg temp_fpr = fprs_to_push[0] ? ARM64Reg::INVALID_REG : ARM64Reg::Q0;
|
|
||||||
const u64 early_push_count = (gprs_to_push & gprs_to_push_early).Count();
|
const u64 early_push_count = (gprs_to_push & gprs_to_push_early).Count();
|
||||||
const u64 early_push_size = Common::AlignUp(early_push_count, 2) * 8;
|
const u64 early_push_size = Common::AlignUp(early_push_count, 2) * 8;
|
||||||
|
|
||||||
WriteConditionalExceptionExit(EXCEPTION_DSI, temp_gpr, temp_fpr, early_push_size);
|
WriteConditionalExceptionExit(EXCEPTION_DSI, memcheck_temp_gpr, temp_fpr_1, early_push_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & BackPatchInfo::FLAG_LOAD)
|
if (flags & BackPatchInfo::FLAG_LOAD)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user