mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-09 15:49:25 +01:00
Merge pull request #9458 from JosJuice/arm-fpu-round
JitArm64: Set flush-to-zero/rounding mode and improve float/double conversion accuracy
This commit is contained in:
commit
5da85f3a25
@ -3601,6 +3601,14 @@ void ARM64FloatEmitter::FCMLT(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0xE, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::FACGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
EmitThreeSame(1, size >> 6, 0x1D, Rd, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::FACGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
EmitThreeSame(1, 2 | (size >> 6), 0x1D, Rd, Rn, Rm);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::FCSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond)
|
||||
{
|
||||
|
@ -1094,6 +1094,8 @@ public:
|
||||
void FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FCMLE(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FCMLT(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FACGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void FACGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
|
||||
// Conditional select
|
||||
void FCSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
|
||||
|
@ -69,6 +69,7 @@ void CPUInfo::Detect()
|
||||
CPU64bit = true;
|
||||
Mode64bit = true;
|
||||
vendor = CPUVendor::ARM;
|
||||
bFlushToZero = true;
|
||||
|
||||
#ifdef _WIN32
|
||||
num_cores = std::thread::hardware_concurrency();
|
||||
|
78
Source/Core/Common/ArmFPURoundMode.cpp
Normal file
78
Source/Core/Common/ArmFPURoundMode.cpp
Normal file
@ -0,0 +1,78 @@
|
||||
// Copyright 2021 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/FPURoundMode.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
static u64 GetFPCR()
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
return _ReadStatusReg(ARM64_FPCR);
|
||||
#else
|
||||
u64 fpcr;
|
||||
__asm__ __volatile__("mrs %0, fpcr" : "=r"(fpcr));
|
||||
return fpcr;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void SetFPCR(u64 fpcr)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
_WriteStatusReg(ARM64_FPCR, fpcr);
|
||||
#else
|
||||
__asm__ __volatile__("msr fpcr, %0" : : "ri"(fpcr));
|
||||
#endif
|
||||
}
|
||||
|
||||
namespace FPURoundMode
|
||||
{
|
||||
static const u64 default_fpcr = GetFPCR();
|
||||
static u64 saved_fpcr = default_fpcr;
|
||||
|
||||
void SetRoundMode(int mode)
|
||||
{
|
||||
// We don't need to do anything here since SetSIMDMode is always called after calling this
|
||||
}
|
||||
|
||||
void SetPrecisionMode(PrecisionMode mode)
|
||||
{
|
||||
}
|
||||
|
||||
void SetSIMDMode(int rounding_mode, bool non_ieee_mode)
|
||||
{
|
||||
// Flush-To-Zero (non-IEEE mode: denormal outputs are set to +/- 0)
|
||||
constexpr u32 FZ = 1 << 24;
|
||||
|
||||
// lookup table for FPSCR.RN-to-FPCR.RMode translation
|
||||
constexpr u32 rounding_mode_table[] = {
|
||||
(0 << 22), // nearest
|
||||
(3 << 22), // zero
|
||||
(1 << 22), // +inf
|
||||
(2 << 22), // -inf
|
||||
};
|
||||
|
||||
const u64 base = default_fpcr & ~(0b111 << 22);
|
||||
SetFPCR(base | rounding_mode_table[rounding_mode] | (non_ieee_mode ? FZ : 0));
|
||||
}
|
||||
|
||||
void SaveSIMDState()
|
||||
{
|
||||
saved_fpcr = GetFPCR();
|
||||
}
|
||||
|
||||
void LoadSIMDState()
|
||||
{
|
||||
SetFPCR(saved_fpcr);
|
||||
}
|
||||
|
||||
void LoadDefaultSIMDState()
|
||||
{
|
||||
SetFPCR(default_fpcr);
|
||||
}
|
||||
|
||||
} // namespace FPURoundMode
|
@ -199,7 +199,7 @@ if(_M_ARM_64)
|
||||
Arm64Emitter.h
|
||||
ArmCommon.h
|
||||
ArmCPUDetect.cpp
|
||||
GenericFPURoundMode.cpp
|
||||
ArmFPURoundMode.cpp
|
||||
)
|
||||
else()
|
||||
if(_M_X86) #X86
|
||||
|
@ -982,6 +982,7 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
||||
|
||||
js.compilerPC = op.address;
|
||||
js.op = &op;
|
||||
js.fpr_is_store_safe = op.fprIsStoreSafeBeforeInst;
|
||||
js.instructionNumber = i;
|
||||
js.instructionsLeft = (code_block.m_num_instructions - 1) - i;
|
||||
const GekkoOPInfo* opinfo = op.opinfo;
|
||||
@ -1118,6 +1119,8 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
||||
|
||||
CompileInstruction(op);
|
||||
|
||||
js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst;
|
||||
|
||||
if (jo.memcheck && (opinfo->flags & FL_LOADSTORE))
|
||||
{
|
||||
// If we have a fastmem loadstore, we can omit the exception check and let fastmem handle
|
||||
|
@ -105,7 +105,7 @@ void Jit64::stfXXX(UGeckoInstruction inst)
|
||||
|
||||
if (single)
|
||||
{
|
||||
if (js.op->fprIsStoreSafe[s])
|
||||
if (js.fpr_is_store_safe[s])
|
||||
{
|
||||
RCOpArg Rs = fpr.Use(s, RCMode::Read);
|
||||
RegCache::Realize(Rs);
|
||||
|
@ -695,6 +695,7 @@ void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
||||
|
||||
js.compilerPC = op.address;
|
||||
js.op = &op;
|
||||
js.fpr_is_store_safe = op.fprIsStoreSafeBeforeInst;
|
||||
js.instructionNumber = i;
|
||||
js.instructionsLeft = (code_block.m_num_instructions - 1) - i;
|
||||
const GekkoOPInfo* opinfo = op.opinfo;
|
||||
@ -830,6 +831,9 @@ void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
||||
}
|
||||
|
||||
CompileInstruction(op);
|
||||
|
||||
js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst;
|
||||
|
||||
if (!CanMergeNextInstructions(1) || js.op[1].opinfo->type != ::OpType::Integer)
|
||||
FlushCarry();
|
||||
|
||||
|
@ -152,7 +152,20 @@ public:
|
||||
void psq_l(UGeckoInstruction inst);
|
||||
void psq_st(UGeckoInstruction inst);
|
||||
|
||||
private:
|
||||
void ConvertDoubleToSingleLower(size_t guest_reg, Arm64Gen::ARM64Reg dest_reg,
|
||||
Arm64Gen::ARM64Reg src_reg);
|
||||
void ConvertDoubleToSinglePair(size_t guest_reg, Arm64Gen::ARM64Reg dest_reg,
|
||||
Arm64Gen::ARM64Reg src_reg);
|
||||
void ConvertSingleToDoubleLower(size_t guest_reg, Arm64Gen::ARM64Reg dest_reg,
|
||||
Arm64Gen::ARM64Reg src_reg,
|
||||
Arm64Gen::ARM64Reg scratch_reg = Arm64Gen::ARM64Reg::INVALID_REG);
|
||||
void ConvertSingleToDoublePair(size_t guest_reg, Arm64Gen::ARM64Reg dest_reg,
|
||||
Arm64Gen::ARM64Reg src_reg,
|
||||
Arm64Gen::ARM64Reg scratch_reg = Arm64Gen::ARM64Reg::INVALID_REG);
|
||||
|
||||
bool IsFPRStoreSafe(size_t guest_reg) const;
|
||||
|
||||
protected:
|
||||
struct SlowmemHandler
|
||||
{
|
||||
Arm64Gen::ARM64Reg dest_reg;
|
||||
@ -184,14 +197,18 @@ private:
|
||||
nearcode = GetWritableCodePtr();
|
||||
SetCodePtrUnsafe(farcode.GetWritableCodePtr());
|
||||
AlignCode16();
|
||||
m_in_farcode = true;
|
||||
}
|
||||
|
||||
void SwitchToNearCode()
|
||||
{
|
||||
farcode.SetCodePtrUnsafe(GetWritableCodePtr());
|
||||
SetCodePtrUnsafe(nearcode);
|
||||
m_in_farcode = false;
|
||||
}
|
||||
|
||||
bool IsInFarCode() const { return m_in_farcode; }
|
||||
|
||||
// Dump a memory range of code
|
||||
void DumpCode(const u8* start, const u8* end);
|
||||
|
||||
@ -215,6 +232,9 @@ private:
|
||||
// AsmRoutines
|
||||
void GenerateAsm();
|
||||
void GenerateCommonAsm();
|
||||
void GenerateConvertDoubleToSingle();
|
||||
void GenerateConvertSingleToDouble();
|
||||
void GenerateQuantizedLoadStores();
|
||||
|
||||
// Profiling
|
||||
void BeginTimeProfile(JitBlock* b);
|
||||
@ -254,6 +274,7 @@ private:
|
||||
|
||||
Arm64Gen::ARM64CodeBlock farcode;
|
||||
u8* nearcode; // Backed up when we switch to far code.
|
||||
bool m_in_farcode = false;
|
||||
|
||||
bool m_enable_blr_optimization;
|
||||
bool m_cleanup_after_stackfault = false;
|
||||
|
@ -61,23 +61,11 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
|
||||
if (flags & BackPatchInfo::FLAG_STORE && flags & BackPatchInfo::FLAG_MASK_FLOAT)
|
||||
{
|
||||
if (flags & BackPatchInfo::FLAG_SIZE_F32)
|
||||
{
|
||||
m_float_emit.FCVT(32, 64, ARM64Reg::D0, RS);
|
||||
m_float_emit.REV32(8, ARM64Reg::D0, ARM64Reg::D0);
|
||||
m_float_emit.STR(32, ARM64Reg::D0, MEM_REG, addr);
|
||||
}
|
||||
else if (flags & BackPatchInfo::FLAG_SIZE_F32I)
|
||||
{
|
||||
m_float_emit.REV32(8, ARM64Reg::D0, RS);
|
||||
m_float_emit.STR(32, ARM64Reg::D0, MEM_REG, addr);
|
||||
}
|
||||
else if (flags & BackPatchInfo::FLAG_SIZE_F32X2)
|
||||
{
|
||||
m_float_emit.FCVTN(32, ARM64Reg::D0, RS);
|
||||
m_float_emit.REV32(8, ARM64Reg::D0, ARM64Reg::D0);
|
||||
m_float_emit.STR(64, ARM64Reg::Q0, MEM_REG, addr);
|
||||
}
|
||||
else if (flags & BackPatchInfo::FLAG_SIZE_F32X2I)
|
||||
{
|
||||
m_float_emit.REV32(8, ARM64Reg::D0, RS);
|
||||
m_float_emit.STR(64, ARM64Reg::Q0, MEM_REG, addr);
|
||||
@ -184,37 +172,22 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
|
||||
if (flags & BackPatchInfo::FLAG_STORE && flags & BackPatchInfo::FLAG_MASK_FLOAT)
|
||||
{
|
||||
if (flags & BackPatchInfo::FLAG_SIZE_F32)
|
||||
{
|
||||
m_float_emit.FCVT(32, 64, ARM64Reg::D0, RS);
|
||||
m_float_emit.UMOV(32, ARM64Reg::W0, ARM64Reg::Q0, 0);
|
||||
MOVP2R(ARM64Reg::X8, &PowerPC::Write_U32);
|
||||
BLR(ARM64Reg::X8);
|
||||
}
|
||||
else if (flags & BackPatchInfo::FLAG_SIZE_F32I)
|
||||
{
|
||||
m_float_emit.UMOV(32, ARM64Reg::W0, RS, 0);
|
||||
MOVP2R(ARM64Reg::X8, &PowerPC::Write_U32);
|
||||
BLR(ARM64Reg::X8);
|
||||
}
|
||||
else if (flags & BackPatchInfo::FLAG_SIZE_F32X2)
|
||||
{
|
||||
m_float_emit.FCVTN(32, ARM64Reg::D0, RS);
|
||||
m_float_emit.UMOV(64, ARM64Reg::X0, ARM64Reg::D0, 0);
|
||||
ROR(ARM64Reg::X0, ARM64Reg::X0, 32);
|
||||
MOVP2R(ARM64Reg::X8, &PowerPC::Write_U64);
|
||||
BLR(ARM64Reg::X8);
|
||||
}
|
||||
else if (flags & BackPatchInfo::FLAG_SIZE_F32X2I)
|
||||
{
|
||||
m_float_emit.UMOV(64, ARM64Reg::X0, RS, 0);
|
||||
ROR(ARM64Reg::X0, ARM64Reg::X0, 32);
|
||||
MOVP2R(ARM64Reg::X8, &PowerPC::Write_U64);
|
||||
ROR(ARM64Reg::X0, ARM64Reg::X0, 32);
|
||||
BLR(ARM64Reg::X8);
|
||||
}
|
||||
else
|
||||
{
|
||||
MOVP2R(ARM64Reg::X8, &PowerPC::Write_U64);
|
||||
m_float_emit.UMOV(64, ARM64Reg::X0, RS, 0);
|
||||
MOVP2R(ARM64Reg::X8, &PowerPC::Write_U64);
|
||||
BLR(ARM64Reg::X8);
|
||||
}
|
||||
}
|
||||
|
@ -220,30 +220,28 @@ void JitArm64::fselx(UGeckoInstruction inst)
|
||||
const u32 c = inst.FC;
|
||||
const u32 d = inst.FD;
|
||||
|
||||
const bool a_single = fpr.IsSingle(a, true);
|
||||
if (a_single)
|
||||
{
|
||||
const ARM64Reg VA = fpr.R(a, RegType::LowerPairSingle);
|
||||
m_float_emit.FCMPE(EncodeRegToSingle(VA));
|
||||
}
|
||||
else
|
||||
{
|
||||
const ARM64Reg VA = fpr.R(a, RegType::LowerPair);
|
||||
m_float_emit.FCMPE(EncodeRegToDouble(VA));
|
||||
}
|
||||
const bool b_and_c_singles = fpr.IsSingle(b, true) && fpr.IsSingle(c, true);
|
||||
const RegType b_and_c_type = b_and_c_singles ? RegType::LowerPairSingle : RegType::LowerPair;
|
||||
const auto b_and_c_reg_encoder = b_and_c_singles ? EncodeRegToSingle : EncodeRegToDouble;
|
||||
|
||||
const bool a_single = fpr.IsSingle(a, true) && (b_and_c_singles || (a != b && a != c));
|
||||
const RegType a_type = a_single ? RegType::LowerPairSingle : RegType::LowerPair;
|
||||
const auto a_reg_encoder = a_single ? EncodeRegToSingle : EncodeRegToDouble;
|
||||
|
||||
const ARM64Reg VA = fpr.R(a, a_type);
|
||||
const ARM64Reg VB = fpr.R(b, b_and_c_type);
|
||||
const ARM64Reg VC = fpr.R(c, b_and_c_type);
|
||||
|
||||
// If a == d, the RW call below may change the type of a to double. This is okay, because the
|
||||
// actual value in the register is not altered by RW. So let's just assert before calling RW.
|
||||
ASSERT_MSG(DYNA_REC, a_single == fpr.IsSingle(a, true),
|
||||
"Register allocation turned singles into doubles in the middle of fselx");
|
||||
|
||||
const bool b_and_c_singles = fpr.IsSingle(b, true) && fpr.IsSingle(c, true);
|
||||
const RegType type = b_and_c_singles ? RegType::LowerPairSingle : RegType::LowerPair;
|
||||
const auto reg_encoder = b_and_c_singles ? EncodeRegToSingle : EncodeRegToDouble;
|
||||
const ARM64Reg VD = fpr.RW(d, b_and_c_type);
|
||||
|
||||
const ARM64Reg VB = fpr.R(b, type);
|
||||
const ARM64Reg VC = fpr.R(c, type);
|
||||
const ARM64Reg VD = fpr.RW(d, type);
|
||||
|
||||
m_float_emit.FCSEL(reg_encoder(VD), reg_encoder(VC), reg_encoder(VB), CC_GE);
|
||||
m_float_emit.FCMPE(a_reg_encoder(VA));
|
||||
m_float_emit.FCSEL(b_and_c_reg_encoder(VD), b_and_c_reg_encoder(VC), b_and_c_reg_encoder(VB),
|
||||
CC_GE);
|
||||
|
||||
ASSERT_MSG(DYNA_REC, b_and_c_singles == (fpr.IsSingle(b, true) && fpr.IsSingle(c, true)),
|
||||
"Register allocation turned singles into doubles in the middle of fselx");
|
||||
@ -260,7 +258,7 @@ void JitArm64::frspx(UGeckoInstruction inst)
|
||||
const u32 d = inst.FD;
|
||||
|
||||
const bool single = fpr.IsSingle(b, true);
|
||||
if (single)
|
||||
if (single && js.fpr_is_store_safe[b])
|
||||
{
|
||||
// Source is already in single precision, so no need to do anything but to copy to PSR1.
|
||||
const ARM64Reg VB = fpr.R(b, RegType::LowerPairSingle);
|
||||
@ -268,6 +266,9 @@ void JitArm64::frspx(UGeckoInstruction inst)
|
||||
|
||||
if (b != d)
|
||||
m_float_emit.FMOV(EncodeRegToSingle(VD), EncodeRegToSingle(VB));
|
||||
|
||||
ASSERT_MSG(DYNA_REC, fpr.IsSingle(b, true),
|
||||
"Register allocation turned singles into doubles in the middle of frspx");
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -276,9 +277,6 @@ void JitArm64::frspx(UGeckoInstruction inst)
|
||||
|
||||
m_float_emit.FCVT(32, 64, EncodeRegToDouble(VD), EncodeRegToDouble(VB));
|
||||
}
|
||||
|
||||
ASSERT_MSG(DYNA_REC, b == d || single == fpr.IsSingle(b, true),
|
||||
"Register allocation turned singles into doubles in the middle of frspx");
|
||||
}
|
||||
|
||||
void JitArm64::fcmpX(UGeckoInstruction inst)
|
||||
@ -386,3 +384,196 @@ void JitArm64::fctiwzx(UGeckoInstruction inst)
|
||||
ASSERT_MSG(DYNA_REC, b == d || single == fpr.IsSingle(b, true),
|
||||
"Register allocation turned singles into doubles in the middle of fctiwzx");
|
||||
}
|
||||
|
||||
// Since the following float conversion functions are used in non-arithmetic PPC float
|
||||
// instructions, they must convert floats bitexact and never flush denormals to zero or turn SNaNs
|
||||
// into QNaNs. This means we can't just use FCVT/FCVTL/FCVTN.
|
||||
|
||||
void JitArm64::ConvertDoubleToSingleLower(size_t guest_reg, ARM64Reg dest_reg, ARM64Reg src_reg)
|
||||
{
|
||||
if (js.fpr_is_store_safe[guest_reg])
|
||||
{
|
||||
m_float_emit.FCVT(32, 64, EncodeRegToDouble(dest_reg), EncodeRegToDouble(src_reg));
|
||||
return;
|
||||
}
|
||||
|
||||
FlushCarry();
|
||||
|
||||
const BitSet32 gpr_saved = gpr.GetCallerSavedUsed() & BitSet32{0, 1, 2, 3, 30};
|
||||
ABI_PushRegisters(gpr_saved);
|
||||
|
||||
m_float_emit.UMOV(64, ARM64Reg::X0, src_reg, 0);
|
||||
BL(cdts);
|
||||
m_float_emit.INS(32, dest_reg, 0, ARM64Reg::W1);
|
||||
|
||||
ABI_PopRegisters(gpr_saved);
|
||||
}
|
||||
|
||||
void JitArm64::ConvertDoubleToSinglePair(size_t guest_reg, ARM64Reg dest_reg, ARM64Reg src_reg)
|
||||
{
|
||||
if (js.fpr_is_store_safe[guest_reg])
|
||||
{
|
||||
m_float_emit.FCVTN(32, EncodeRegToDouble(dest_reg), EncodeRegToDouble(src_reg));
|
||||
return;
|
||||
}
|
||||
|
||||
FlushCarry();
|
||||
|
||||
const BitSet32 gpr_saved = gpr.GetCallerSavedUsed() & BitSet32{0, 1, 2, 3, 30};
|
||||
ABI_PushRegisters(gpr_saved);
|
||||
|
||||
m_float_emit.UMOV(64, ARM64Reg::X0, src_reg, 0);
|
||||
BL(cdts);
|
||||
m_float_emit.INS(32, dest_reg, 0, ARM64Reg::W1);
|
||||
|
||||
m_float_emit.UMOV(64, ARM64Reg::X0, src_reg, 1);
|
||||
BL(cdts);
|
||||
m_float_emit.INS(32, dest_reg, 1, ARM64Reg::W1);
|
||||
|
||||
ABI_PopRegisters(gpr_saved);
|
||||
}
|
||||
|
||||
void JitArm64::ConvertSingleToDoubleLower(size_t guest_reg, ARM64Reg dest_reg, ARM64Reg src_reg,
|
||||
ARM64Reg scratch_reg)
|
||||
{
|
||||
ASSERT(scratch_reg != src_reg);
|
||||
|
||||
if (js.fpr_is_store_safe[guest_reg])
|
||||
{
|
||||
m_float_emit.FCVT(64, 32, EncodeRegToDouble(dest_reg), EncodeRegToDouble(src_reg));
|
||||
return;
|
||||
}
|
||||
|
||||
const bool switch_to_farcode = !IsInFarCode();
|
||||
|
||||
FlushCarry();
|
||||
|
||||
// Do we know that the input isn't NaN, and that the input isn't denormal or FPCR.FZ is not set?
|
||||
// (This check unfortunately also catches zeroes)
|
||||
|
||||
FixupBranch fast;
|
||||
if (scratch_reg != ARM64Reg::INVALID_REG)
|
||||
{
|
||||
m_float_emit.FABS(EncodeRegToSingle(scratch_reg), EncodeRegToSingle(src_reg));
|
||||
m_float_emit.FCMP(EncodeRegToSingle(scratch_reg));
|
||||
fast = B(CCFlags::CC_GT);
|
||||
|
||||
if (switch_to_farcode)
|
||||
{
|
||||
FixupBranch slow = B();
|
||||
|
||||
SwitchToFarCode();
|
||||
SetJumpTarget(slow);
|
||||
}
|
||||
}
|
||||
|
||||
// If no (or if we don't have a scratch register), call the bit-exact routine
|
||||
|
||||
const BitSet32 gpr_saved = gpr.GetCallerSavedUsed() & BitSet32{0, 1, 2, 3, 4, 30};
|
||||
ABI_PushRegisters(gpr_saved);
|
||||
|
||||
m_float_emit.UMOV(32, ARM64Reg::W0, src_reg, 0);
|
||||
BL(cstd);
|
||||
m_float_emit.INS(64, dest_reg, 0, ARM64Reg::X0);
|
||||
|
||||
ABI_PopRegisters(gpr_saved);
|
||||
|
||||
// If yes, do a fast conversion with FCVT
|
||||
|
||||
if (scratch_reg != ARM64Reg::INVALID_REG)
|
||||
{
|
||||
FixupBranch continue1 = B();
|
||||
|
||||
if (switch_to_farcode)
|
||||
SwitchToNearCode();
|
||||
|
||||
SetJumpTarget(fast);
|
||||
|
||||
m_float_emit.FCVT(64, 32, EncodeRegToDouble(dest_reg), EncodeRegToDouble(src_reg));
|
||||
|
||||
SetJumpTarget(continue1);
|
||||
}
|
||||
}
|
||||
|
||||
void JitArm64::ConvertSingleToDoublePair(size_t guest_reg, ARM64Reg dest_reg, ARM64Reg src_reg,
|
||||
ARM64Reg scratch_reg)
|
||||
{
|
||||
ASSERT(scratch_reg != src_reg);
|
||||
|
||||
if (js.fpr_is_store_safe[guest_reg])
|
||||
{
|
||||
m_float_emit.FCVTL(64, EncodeRegToDouble(dest_reg), EncodeRegToDouble(src_reg));
|
||||
return;
|
||||
}
|
||||
|
||||
const bool switch_to_farcode = !IsInFarCode();
|
||||
|
||||
FlushCarry();
|
||||
|
||||
// Do we know that neither input is NaN, and that neither input is denormal or FPCR.FZ is not set?
|
||||
// (This check unfortunately also catches zeroes)
|
||||
|
||||
FixupBranch fast;
|
||||
if (scratch_reg != ARM64Reg::INVALID_REG)
|
||||
{
|
||||
// Set each 32-bit element of scratch_reg to 0x0000'0000 or 0xFFFF'FFFF depending on whether
|
||||
// the absolute value of the corresponding element in src_reg compares greater than 0
|
||||
m_float_emit.MOVI(8, EncodeRegToDouble(scratch_reg), 0);
|
||||
m_float_emit.FACGT(32, EncodeRegToDouble(scratch_reg), EncodeRegToDouble(src_reg),
|
||||
EncodeRegToDouble(scratch_reg));
|
||||
|
||||
// 0x0000'0000'0000'0000 (zero) -> 0x0000'0000'0000'0000 (zero)
|
||||
// 0x0000'0000'FFFF'FFFF (denormal) -> 0xFF00'0000'FFFF'FFFF (normal)
|
||||
// 0xFFFF'FFFF'0000'0000 (NaN) -> 0x00FF'FFFF'0000'0000 (normal)
|
||||
// 0xFFFF'FFFF'FFFF'FFFF (NaN) -> 0xFFFF'FFFF'FFFF'FFFF (NaN)
|
||||
m_float_emit.INS(8, EncodeRegToDouble(scratch_reg), 7, EncodeRegToDouble(scratch_reg), 0);
|
||||
|
||||
// Is scratch_reg a NaN (0xFFFF'FFFF'FFFF'FFFF)?
|
||||
m_float_emit.FCMP(EncodeRegToDouble(scratch_reg));
|
||||
fast = B(CCFlags::CC_VS);
|
||||
|
||||
if (switch_to_farcode)
|
||||
{
|
||||
FixupBranch slow = B();
|
||||
|
||||
SwitchToFarCode();
|
||||
SetJumpTarget(slow);
|
||||
}
|
||||
}
|
||||
|
||||
// If no (or if we don't have a scratch register), call the bit-exact routine
|
||||
|
||||
// Save X0-X4 and X30 if they're in use
|
||||
const BitSet32 gpr_saved = gpr.GetCallerSavedUsed() & BitSet32{0, 1, 2, 3, 4, 30};
|
||||
ABI_PushRegisters(gpr_saved);
|
||||
|
||||
m_float_emit.UMOV(32, ARM64Reg::W0, src_reg, 1);
|
||||
BL(cstd);
|
||||
m_float_emit.INS(64, dest_reg, 1, ARM64Reg::X0);
|
||||
|
||||
m_float_emit.UMOV(32, ARM64Reg::W0, src_reg, 0);
|
||||
BL(cstd);
|
||||
m_float_emit.INS(64, dest_reg, 0, ARM64Reg::X0);
|
||||
|
||||
ABI_PopRegisters(gpr_saved);
|
||||
|
||||
// If yes, do a fast conversion with FCVTL
|
||||
|
||||
if (scratch_reg != ARM64Reg::INVALID_REG)
|
||||
{
|
||||
FixupBranch continue1 = B();
|
||||
|
||||
if (switch_to_farcode)
|
||||
SwitchToNearCode();
|
||||
|
||||
SetJumpTarget(fast);
|
||||
m_float_emit.FCVTL(64, EncodeRegToDouble(dest_reg), EncodeRegToDouble(src_reg));
|
||||
|
||||
SetJumpTarget(continue1);
|
||||
}
|
||||
}
|
||||
|
||||
bool JitArm64::IsFPRStoreSafe(size_t guest_reg) const
|
||||
{
|
||||
return js.fpr_is_store_safe[guest_reg];
|
||||
}
|
||||
|
@ -189,6 +189,7 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
||||
|
||||
u32 a = inst.RA, b = inst.RB;
|
||||
|
||||
bool want_single = false;
|
||||
s32 offset = inst.SIMM_16;
|
||||
u32 flags = BackPatchInfo::FLAG_STORE;
|
||||
bool update = false;
|
||||
@ -200,10 +201,12 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
||||
switch (inst.SUBOP10)
|
||||
{
|
||||
case 663: // stfsx
|
||||
want_single = true;
|
||||
flags |= BackPatchInfo::FLAG_SIZE_F32;
|
||||
offset_reg = b;
|
||||
break;
|
||||
case 695: // stfsux
|
||||
want_single = true;
|
||||
flags |= BackPatchInfo::FLAG_SIZE_F32;
|
||||
update = true;
|
||||
offset_reg = b;
|
||||
@ -218,16 +221,19 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
||||
offset_reg = b;
|
||||
break;
|
||||
case 983: // stfiwx
|
||||
flags |= BackPatchInfo::FLAG_SIZE_F32I;
|
||||
// This instruction writes the lower 32 bits of a double. want_single must be false
|
||||
flags |= BackPatchInfo::FLAG_SIZE_F32;
|
||||
offset_reg = b;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 53: // stfsu
|
||||
want_single = true;
|
||||
flags |= BackPatchInfo::FLAG_SIZE_F32;
|
||||
update = true;
|
||||
break;
|
||||
case 52: // stfs
|
||||
want_single = true;
|
||||
flags |= BackPatchInfo::FLAG_SIZE_F32;
|
||||
break;
|
||||
case 55: // stfdu
|
||||
@ -242,19 +248,22 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
||||
u32 imm_addr = 0;
|
||||
bool is_immediate = false;
|
||||
|
||||
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
||||
fpr.Lock(ARM64Reg::Q0);
|
||||
|
||||
const bool single = (flags & BackPatchInfo::FLAG_SIZE_F32) && fpr.IsSingle(inst.FS, true);
|
||||
const bool have_single = fpr.IsSingle(inst.FS, true);
|
||||
|
||||
const ARM64Reg V0 = fpr.R(inst.FS, single ? RegType::LowerPairSingle : RegType::LowerPair);
|
||||
ARM64Reg V0 =
|
||||
fpr.R(inst.FS, want_single && have_single ? RegType::LowerPairSingle : RegType::LowerPair);
|
||||
|
||||
if (single)
|
||||
if (want_single && !have_single)
|
||||
{
|
||||
flags &= ~BackPatchInfo::FLAG_SIZE_F32;
|
||||
flags |= BackPatchInfo::FLAG_SIZE_F32I;
|
||||
const ARM64Reg single_reg = fpr.GetReg();
|
||||
ConvertDoubleToSingleLower(inst.FS, single_reg, V0);
|
||||
V0 = single_reg;
|
||||
}
|
||||
|
||||
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
||||
|
||||
ARM64Reg addr_reg = ARM64Reg::W1;
|
||||
|
||||
if (update)
|
||||
@ -359,19 +368,11 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
||||
accessSize = 32;
|
||||
|
||||
LDR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
|
||||
|
||||
if (flags & BackPatchInfo::FLAG_SIZE_F64)
|
||||
{
|
||||
m_float_emit.REV64(8, ARM64Reg::Q0, V0);
|
||||
}
|
||||
else if (flags & BackPatchInfo::FLAG_SIZE_F32)
|
||||
{
|
||||
m_float_emit.FCVT(32, 64, ARM64Reg::D0, EncodeRegToDouble(V0));
|
||||
m_float_emit.REV32(8, ARM64Reg::D0, ARM64Reg::D0);
|
||||
}
|
||||
else if (flags & BackPatchInfo::FLAG_SIZE_F32I)
|
||||
{
|
||||
m_float_emit.REV32(8, ARM64Reg::D0, V0);
|
||||
}
|
||||
|
||||
m_float_emit.STR(accessSize, IndexType::Post, accessSize == 64 ? ARM64Reg::Q0 : ARM64Reg::D0,
|
||||
ARM64Reg::X0, accessSize >> 3);
|
||||
@ -399,6 +400,10 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
||||
{
|
||||
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, V0, XA, regs_in_use, fprs_in_use);
|
||||
}
|
||||
|
||||
if (want_single && !have_single)
|
||||
fpr.Unlock(V0);
|
||||
|
||||
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
|
||||
fpr.Unlock(ARM64Reg::Q0);
|
||||
}
|
||||
|
@ -116,13 +116,44 @@ void JitArm64::psq_st(UGeckoInstruction inst)
|
||||
const bool update = inst.OPCD == 61;
|
||||
const s32 offset = inst.SIMM_12;
|
||||
|
||||
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
|
||||
fpr.Lock(ARM64Reg::Q0, ARM64Reg::Q1);
|
||||
|
||||
const bool single = fpr.IsSingle(inst.RS);
|
||||
const bool have_single = fpr.IsSingle(inst.RS);
|
||||
|
||||
ARM64Reg VS = fpr.R(inst.RS, have_single ? RegType::Single : RegType::Register);
|
||||
|
||||
if (js.assumeNoPairedQuantize)
|
||||
{
|
||||
if (!have_single)
|
||||
{
|
||||
const ARM64Reg single_reg = fpr.GetReg();
|
||||
|
||||
if (inst.W)
|
||||
m_float_emit.FCVT(32, 64, EncodeRegToDouble(single_reg), EncodeRegToDouble(VS));
|
||||
else
|
||||
m_float_emit.FCVTN(32, EncodeRegToDouble(single_reg), EncodeRegToDouble(VS));
|
||||
|
||||
VS = single_reg;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (have_single)
|
||||
{
|
||||
m_float_emit.ORR(ARM64Reg::D0, VS, VS);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (inst.W)
|
||||
m_float_emit.FCVT(32, 64, ARM64Reg::D0, VS);
|
||||
else
|
||||
m_float_emit.FCVTN(32, ARM64Reg::D0, VS);
|
||||
}
|
||||
}
|
||||
|
||||
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
|
||||
|
||||
const ARM64Reg arm_addr = gpr.R(inst.RA);
|
||||
const ARM64Reg VS = fpr.R(inst.RS, single ? RegType::Single : RegType::Register);
|
||||
|
||||
constexpr ARM64Reg scale_reg = ARM64Reg::W0;
|
||||
constexpr ARM64Reg addr_reg = ARM64Reg::W1;
|
||||
@ -157,28 +188,13 @@ void JitArm64::psq_st(UGeckoInstruction inst)
|
||||
{
|
||||
u32 flags = BackPatchInfo::FLAG_STORE;
|
||||
|
||||
if (single)
|
||||
flags |= (inst.W ? BackPatchInfo::FLAG_SIZE_F32I : BackPatchInfo::FLAG_SIZE_F32X2I);
|
||||
else
|
||||
flags |= (inst.W ? BackPatchInfo::FLAG_SIZE_F32 : BackPatchInfo::FLAG_SIZE_F32X2);
|
||||
flags |= (inst.W ? BackPatchInfo::FLAG_SIZE_F32 : BackPatchInfo::FLAG_SIZE_F32X2);
|
||||
|
||||
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, VS, EncodeRegTo64(addr_reg), gprs_in_use,
|
||||
fprs_in_use);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (single)
|
||||
{
|
||||
m_float_emit.ORR(ARM64Reg::D0, VS, VS);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (inst.W)
|
||||
m_float_emit.FCVT(32, 64, ARM64Reg::D0, VS);
|
||||
else
|
||||
m_float_emit.FCVTN(32, ARM64Reg::D0, VS);
|
||||
}
|
||||
|
||||
LDR(IndexType::Unsigned, scale_reg, PPC_REG, PPCSTATE_OFF_SPR(SPR_GQR0 + inst.I));
|
||||
UBFM(type_reg, scale_reg, 0, 2); // Type
|
||||
UBFM(scale_reg, scale_reg, 8, 13); // Scale
|
||||
@ -212,6 +228,9 @@ void JitArm64::psq_st(UGeckoInstruction inst)
|
||||
SetJumpTarget(continue1);
|
||||
}
|
||||
|
||||
if (js.assumeNoPairedQuantize && !have_single)
|
||||
fpr.Unlock(VS);
|
||||
|
||||
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
|
||||
fpr.Unlock(ARM64Reg::Q0, ARM64Reg::Q1);
|
||||
}
|
||||
|
@ -17,9 +17,10 @@
|
||||
|
||||
using namespace Arm64Gen;
|
||||
|
||||
void Arm64RegCache::Init(ARM64XEmitter* emitter)
|
||||
void Arm64RegCache::Init(JitArm64* jit)
|
||||
{
|
||||
m_emit = emitter;
|
||||
m_jit = jit;
|
||||
m_emit = jit;
|
||||
m_float_emit.reset(new ARM64FloatEmitter(m_emit));
|
||||
GetAllocationOrder();
|
||||
}
|
||||
@ -467,7 +468,10 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type)
|
||||
return host_reg;
|
||||
|
||||
// Else convert this register back to doubles.
|
||||
m_float_emit->FCVTL(64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
|
||||
const ARM64Reg tmp_reg = GetReg();
|
||||
m_jit->ConvertSingleToDoublePair(preg, host_reg, host_reg, tmp_reg);
|
||||
UnlockRegister(tmp_reg);
|
||||
|
||||
reg.Load(host_reg, RegType::Register);
|
||||
[[fallthrough]];
|
||||
}
|
||||
@ -482,7 +486,10 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type)
|
||||
return host_reg;
|
||||
|
||||
// Else convert this register back to a double.
|
||||
m_float_emit->FCVT(64, 32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
|
||||
const ARM64Reg tmp_reg = GetReg();
|
||||
m_jit->ConvertSingleToDoubleLower(preg, host_reg, host_reg, tmp_reg);
|
||||
UnlockRegister(tmp_reg);
|
||||
|
||||
reg.Load(host_reg, RegType::LowerPair);
|
||||
[[fallthrough]];
|
||||
}
|
||||
@ -516,7 +523,10 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type)
|
||||
return host_reg;
|
||||
}
|
||||
|
||||
m_float_emit->FCVT(64, 32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
|
||||
const ARM64Reg tmp_reg = GetReg();
|
||||
m_jit->ConvertSingleToDoubleLower(preg, host_reg, host_reg, tmp_reg);
|
||||
UnlockRegister(tmp_reg);
|
||||
|
||||
reg.Load(host_reg, RegType::Duplicated);
|
||||
[[fallthrough]];
|
||||
}
|
||||
@ -584,7 +594,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type)
|
||||
if ((type == RegType::LowerPair || type == RegType::LowerPairSingle) && was_dirty)
|
||||
{
|
||||
// We must *not* change host_reg as this register might still be in use. So it's fine to
|
||||
// store this register, but it's *not* fine to convert it to double. So for double convertion,
|
||||
// store this register, but it's *not* fine to convert it to double. So for double conversion,
|
||||
// a temporary register needs to be used.
|
||||
ARM64Reg host_reg = reg.GetReg();
|
||||
ARM64Reg flush_reg = host_reg;
|
||||
@ -592,9 +602,27 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type)
|
||||
switch (reg.GetType())
|
||||
{
|
||||
case RegType::Single:
|
||||
// For a store-safe register, conversion is just one instruction regardless of whether
|
||||
// we're whether we're converting a pair, so ConvertSingleToDoublePair followed by a
|
||||
// 128-bit store is faster than INS followed by ConvertSingleToDoubleLower and a
|
||||
// 64-bit store. But for registers which are not store-safe, the latter is better.
|
||||
flush_reg = GetReg();
|
||||
m_float_emit->FCVTL(64, EncodeRegToDouble(flush_reg), EncodeRegToDouble(host_reg));
|
||||
[[fallthrough]];
|
||||
if (!m_jit->IsFPRStoreSafe(preg))
|
||||
{
|
||||
ARM64Reg scratch_reg = GetReg();
|
||||
m_float_emit->INS(32, flush_reg, 0, host_reg, 1);
|
||||
m_jit->ConvertSingleToDoubleLower(preg, flush_reg, flush_reg, scratch_reg);
|
||||
m_float_emit->STR(64, IndexType::Unsigned, flush_reg, PPC_REG, u32(PPCSTATE_OFF_PS1(preg)));
|
||||
Unlock(scratch_reg);
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_jit->ConvertSingleToDoublePair(preg, flush_reg, host_reg, flush_reg);
|
||||
m_float_emit->STR(128, IndexType::Unsigned, flush_reg, PPC_REG,
|
||||
u32(PPCSTATE_OFF_PS0(preg)));
|
||||
}
|
||||
break;
|
||||
case RegType::Register:
|
||||
// We are doing a full 128bit store because it takes 2 cycles on a Cortex-A57 to do a 128bit
|
||||
// store.
|
||||
@ -604,7 +632,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type)
|
||||
break;
|
||||
case RegType::DuplicatedSingle:
|
||||
flush_reg = GetReg();
|
||||
m_float_emit->FCVT(64, 32, EncodeRegToDouble(flush_reg), EncodeRegToDouble(host_reg));
|
||||
m_jit->ConvertSingleToDoubleLower(preg, flush_reg, host_reg, flush_reg);
|
||||
[[fallthrough]];
|
||||
case RegType::Duplicated:
|
||||
// Store PSR1 (which is equal to PSR0) in memory.
|
||||
@ -708,17 +736,20 @@ void Arm64FPRCache::FlushRegister(size_t preg, bool maintain_state)
|
||||
const bool dirty = reg.IsDirty();
|
||||
RegType type = reg.GetType();
|
||||
|
||||
// If FlushRegister calls GetReg with all registers locked, we can get infinite recursion
|
||||
const ARM64Reg tmp_reg = GetUnlockedRegisterCount() > 0 ? GetReg() : ARM64Reg::INVALID_REG;
|
||||
|
||||
// If we're in single mode, just convert it back to a double.
|
||||
if (type == RegType::Single)
|
||||
{
|
||||
if (dirty)
|
||||
m_float_emit->FCVTL(64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
|
||||
m_jit->ConvertSingleToDoublePair(preg, host_reg, host_reg, tmp_reg);
|
||||
type = RegType::Register;
|
||||
}
|
||||
if (type == RegType::DuplicatedSingle || type == RegType::LowerPairSingle)
|
||||
{
|
||||
if (dirty)
|
||||
m_float_emit->FCVT(64, 32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
|
||||
m_jit->ConvertSingleToDoubleLower(preg, host_reg, host_reg, tmp_reg);
|
||||
|
||||
if (type == RegType::DuplicatedSingle)
|
||||
type = RegType::Duplicated;
|
||||
@ -770,6 +801,9 @@ void Arm64FPRCache::FlushRegister(size_t preg, bool maintain_state)
|
||||
reg.Flush();
|
||||
}
|
||||
}
|
||||
|
||||
if (tmp_reg != ARM64Reg::INVALID_REG)
|
||||
UnlockRegister(tmp_reg);
|
||||
}
|
||||
|
||||
void Arm64FPRCache::FlushRegisters(BitSet32 regs, bool maintain_state)
|
||||
@ -806,7 +840,7 @@ void Arm64FPRCache::FixSinglePrecision(size_t preg)
|
||||
m_float_emit->FCVT(32, 64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
|
||||
reg.Load(host_reg, RegType::DuplicatedSingle);
|
||||
break;
|
||||
case RegType::Register: // PS0 and PS1 needs to be converted
|
||||
case RegType::Register: // PS0 and PS1 need to be converted
|
||||
m_float_emit->FCVTN(32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
|
||||
reg.Load(host_reg, RegType::Single);
|
||||
break;
|
||||
|
@ -15,6 +15,8 @@
|
||||
#include "Core/PowerPC/PPCAnalyst.h"
|
||||
#include "Core/PowerPC/PowerPC.h"
|
||||
|
||||
class JitArm64;
|
||||
|
||||
// Dedicated host registers
|
||||
|
||||
// memory base register
|
||||
@ -150,7 +152,7 @@ public:
|
||||
explicit Arm64RegCache(size_t guest_reg_count) : m_guest_registers(guest_reg_count) {}
|
||||
virtual ~Arm64RegCache() = default;
|
||||
|
||||
void Init(Arm64Gen::ARM64XEmitter* emitter);
|
||||
void Init(JitArm64* jit);
|
||||
|
||||
virtual void Start(PPCAnalyst::BlockRegStats& stats) {}
|
||||
void DiscardRegisters(BitSet32 regs);
|
||||
@ -166,6 +168,9 @@ public:
|
||||
|
||||
void UpdateLastUsed(BitSet32 regs_used);
|
||||
|
||||
// Get available host registers
|
||||
u32 GetUnlockedRegisterCount() const;
|
||||
|
||||
// Locks a register so a cache cannot use it
|
||||
// Useful for function calls
|
||||
template <typename T = Arm64Gen::ARM64Reg, typename... Args>
|
||||
@ -209,15 +214,14 @@ protected:
|
||||
void DiscardRegister(size_t preg);
|
||||
virtual void FlushRegister(size_t preg, bool maintain_state) = 0;
|
||||
|
||||
// Get available host registers
|
||||
u32 GetUnlockedRegisterCount() const;
|
||||
|
||||
void IncrementAllUsed()
|
||||
{
|
||||
for (auto& reg : m_guest_registers)
|
||||
reg.IncrementLastUsed();
|
||||
}
|
||||
|
||||
JitArm64* m_jit = nullptr;
|
||||
|
||||
// Code emitter
|
||||
Arm64Gen::ARM64XEmitter* m_emit = nullptr;
|
||||
|
||||
|
@ -194,6 +194,85 @@ void JitArm64::GenerateAsm()
|
||||
}
|
||||
|
||||
void JitArm64::GenerateCommonAsm()
|
||||
{
|
||||
GetAsmRoutines()->cdts = GetCodePtr();
|
||||
GenerateConvertDoubleToSingle();
|
||||
JitRegister::Register(GetAsmRoutines()->cdts, GetCodePtr(), "JIT_cdts");
|
||||
|
||||
GetAsmRoutines()->cstd = GetCodePtr();
|
||||
GenerateConvertSingleToDouble();
|
||||
JitRegister::Register(GetAsmRoutines()->cdts, GetCodePtr(), "JIT_cstd");
|
||||
|
||||
GenerateQuantizedLoadStores();
|
||||
}
|
||||
|
||||
// Input in X0, output in W1, clobbers X0-X3 and flags.
|
||||
void JitArm64::GenerateConvertDoubleToSingle()
|
||||
{
|
||||
UBFX(ARM64Reg::X2, ARM64Reg::X0, 52, 11);
|
||||
SUB(ARM64Reg::W3, ARM64Reg::W2, 874);
|
||||
CMP(ARM64Reg::W3, 896 - 874);
|
||||
LSR(ARM64Reg::X1, ARM64Reg::X0, 32);
|
||||
FixupBranch denormal = B(CCFlags::CC_LS);
|
||||
|
||||
ANDI2R(ARM64Reg::X1, ARM64Reg::X1, 0xc0000000);
|
||||
BFXIL(ARM64Reg::X1, ARM64Reg::X0, 29, 30);
|
||||
RET();
|
||||
|
||||
SetJumpTarget(denormal);
|
||||
LSR(ARM64Reg::X3, ARM64Reg::X0, 21);
|
||||
MOVZ(ARM64Reg::X0, 905);
|
||||
ORRI2R(ARM64Reg::W3, ARM64Reg::W3, 0x80000000);
|
||||
SUB(ARM64Reg::W2, ARM64Reg::W0, ARM64Reg::W2);
|
||||
LSRV(ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::W2);
|
||||
ANDI2R(ARM64Reg::X3, ARM64Reg::X1, 0x80000000);
|
||||
ORR(ARM64Reg::X1, ARM64Reg::X3, ARM64Reg::X2);
|
||||
RET();
|
||||
}
|
||||
|
||||
// Input in W0, output in X0, clobbers X0-X4 and flags.
|
||||
void JitArm64::GenerateConvertSingleToDouble()
|
||||
{
|
||||
UBFX(ARM64Reg::W1, ARM64Reg::W0, 23, 8);
|
||||
FixupBranch normal_or_nan = CBNZ(ARM64Reg::W1);
|
||||
|
||||
ANDI2R(ARM64Reg::W1, ARM64Reg::W0, 0x007fffff);
|
||||
FixupBranch denormal = CBNZ(ARM64Reg::W1);
|
||||
|
||||
// Zero
|
||||
LSL(ARM64Reg::X0, ARM64Reg::X0, 32);
|
||||
RET();
|
||||
|
||||
SetJumpTarget(denormal);
|
||||
ANDI2R(ARM64Reg::W2, ARM64Reg::W0, 0x80000000);
|
||||
CLZ(ARM64Reg::X3, ARM64Reg::X1);
|
||||
LSL(ARM64Reg::X2, ARM64Reg::X2, 32);
|
||||
ORRI2R(ARM64Reg::X4, ARM64Reg::X3, 0xffffffffffffffc0);
|
||||
SUB(ARM64Reg::X2, ARM64Reg::X2, ARM64Reg::X3, ArithOption(ARM64Reg::X3, ShiftType::LSL, 52));
|
||||
ADD(ARM64Reg::X3, ARM64Reg::X4, 23);
|
||||
LSLV(ARM64Reg::X1, ARM64Reg::X1, ARM64Reg::X3);
|
||||
BFI(ARM64Reg::X2, ARM64Reg::X1, 30, 22);
|
||||
MOVI2R(ARM64Reg::X1, 0x3a90000000000000);
|
||||
ADD(ARM64Reg::X0, ARM64Reg::X2, ARM64Reg::X1);
|
||||
RET();
|
||||
|
||||
SetJumpTarget(normal_or_nan);
|
||||
CMP(ARM64Reg::W1, 0xff);
|
||||
ANDI2R(ARM64Reg::W2, ARM64Reg::W0, 0x40000000);
|
||||
CSET(ARM64Reg::W4, CCFlags::CC_NEQ);
|
||||
ANDI2R(ARM64Reg::W3, ARM64Reg::W0, 0xc0000000);
|
||||
EOR(ARM64Reg::W2, ARM64Reg::W4, ARM64Reg::W2, ArithOption(ARM64Reg::W2, ShiftType::LSR, 30));
|
||||
MOVI2R(ARM64Reg::X1, 0x3800000000000000);
|
||||
ANDI2R(ARM64Reg::W4, ARM64Reg::W0, 0x3fffffff);
|
||||
LSL(ARM64Reg::X3, ARM64Reg::X3, 32);
|
||||
CMP(ARM64Reg::W2, 0);
|
||||
CSEL(ARM64Reg::X1, ARM64Reg::X1, ARM64Reg::ZR, CCFlags::CC_NEQ);
|
||||
BFI(ARM64Reg::X3, ARM64Reg::X4, 29, 30);
|
||||
ORR(ARM64Reg::X0, ARM64Reg::X3, ARM64Reg::X1);
|
||||
RET();
|
||||
}
|
||||
|
||||
void JitArm64::GenerateQuantizedLoadStores()
|
||||
{
|
||||
// X0 is the scale
|
||||
// X1 is address
|
||||
@ -654,6 +733,4 @@ void JitArm64::GenerateCommonAsm()
|
||||
paired_store_quantized[29] = storeSingleU16Slow;
|
||||
paired_store_quantized[30] = storeSingleS8Slow;
|
||||
paired_store_quantized[31] = storeSingleS16Slow;
|
||||
|
||||
GetAsmRoutines()->mfcr = nullptr;
|
||||
}
|
||||
|
@ -16,14 +16,11 @@ struct BackPatchInfo
|
||||
FLAG_SIZE_32 = (1 << 4),
|
||||
FLAG_SIZE_F32 = (1 << 5),
|
||||
FLAG_SIZE_F32X2 = (1 << 6),
|
||||
FLAG_SIZE_F32X2I = (1 << 7),
|
||||
FLAG_SIZE_F64 = (1 << 8),
|
||||
FLAG_REVERSE = (1 << 9),
|
||||
FLAG_EXTEND = (1 << 10),
|
||||
FLAG_SIZE_F32I = (1 << 11),
|
||||
FLAG_ZERO_256 = (1 << 12),
|
||||
FLAG_MASK_FLOAT =
|
||||
FLAG_SIZE_F32 | FLAG_SIZE_F32X2 | FLAG_SIZE_F32X2I | FLAG_SIZE_F64 | FLAG_SIZE_F32I,
|
||||
FLAG_SIZE_F64 = (1 << 7),
|
||||
FLAG_REVERSE = (1 << 8),
|
||||
FLAG_EXTEND = (1 << 9),
|
||||
FLAG_ZERO_256 = (1 << 10),
|
||||
FLAG_MASK_FLOAT = FLAG_SIZE_F32 | FLAG_SIZE_F32X2 | FLAG_SIZE_F64,
|
||||
};
|
||||
|
||||
static u32 GetFlagSize(u32 flags)
|
||||
@ -34,8 +31,10 @@ struct BackPatchInfo
|
||||
return 16;
|
||||
if (flags & FLAG_SIZE_32)
|
||||
return 32;
|
||||
if (flags & FLAG_SIZE_F32 || flags & FLAG_SIZE_F32I)
|
||||
if (flags & FLAG_SIZE_F32)
|
||||
return 32;
|
||||
if (flags & FLAG_SIZE_F32X2)
|
||||
return 64;
|
||||
if (flags & FLAG_SIZE_F64)
|
||||
return 64;
|
||||
if (flags & FLAG_ZERO_256)
|
||||
|
@ -26,6 +26,7 @@ struct CommonAsmRoutinesBase
|
||||
const u8* fres;
|
||||
const u8* mfcr;
|
||||
const u8* cdts;
|
||||
const u8* cstd;
|
||||
|
||||
// In: array index: GQR to use.
|
||||
// In: ECX: Address to read from.
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <map>
|
||||
#include <unordered_set>
|
||||
|
||||
#include "Common/BitSet.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
#include "Core/ConfigManager.h"
|
||||
@ -98,6 +99,7 @@ protected:
|
||||
PPCAnalyst::BlockRegStats gpa;
|
||||
PPCAnalyst::BlockRegStats fpa;
|
||||
PPCAnalyst::CodeOp* op;
|
||||
BitSet32 fpr_is_store_safe;
|
||||
|
||||
JitBlock* curBlock;
|
||||
|
||||
|
@ -976,7 +976,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, std:
|
||||
|
||||
op.fprIsSingle = fprIsSingle;
|
||||
op.fprIsDuplicated = fprIsDuplicated;
|
||||
op.fprIsStoreSafe = fprIsStoreSafe;
|
||||
op.fprIsStoreSafeBeforeInst = fprIsStoreSafe;
|
||||
if (op.fregOut >= 0)
|
||||
{
|
||||
if (op.opinfo->type == OpType::SingleFP)
|
||||
@ -1036,6 +1036,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, std:
|
||||
(op.opinfo->type == OpType::SingleFP || op.opinfo->type == OpType::PS);
|
||||
}
|
||||
}
|
||||
op.fprIsStoreSafeAfterInst = fprIsStoreSafe;
|
||||
|
||||
if (op.opinfo->type == OpType::StorePS || op.opinfo->type == OpType::LoadPS)
|
||||
{
|
||||
|
@ -66,7 +66,8 @@ struct CodeOp // 16B
|
||||
// convert between single and double formats by just using the host machine's instruction for it.
|
||||
// (The reason why we can't always do this is because some games rely on the exact bits of
|
||||
// denormals and SNaNs being preserved as long as no arithmetic operation is performed on them.)
|
||||
BitSet32 fprIsStoreSafe;
|
||||
BitSet32 fprIsStoreSafeBeforeInst;
|
||||
BitSet32 fprIsStoreSafeAfterInst;
|
||||
|
||||
BitSet32 GetFregsOut() const
|
||||
{
|
||||
|
@ -13,7 +13,7 @@
|
||||
<ItemGroup>
|
||||
<ClCompile Include="Common\Arm64Emitter.cpp" />
|
||||
<ClCompile Include="Common\ArmCPUDetect.cpp" />
|
||||
<ClCompile Include="Common\GenericFPURoundMode.cpp" />
|
||||
<ClCompile Include="Common\ArmFPURoundMode.cpp" />
|
||||
<ClCompile Include="Core\PowerPC\JitArm64\Jit_Util.cpp" />
|
||||
<ClCompile Include="Core\PowerPC\JitArm64\Jit.cpp" />
|
||||
<ClCompile Include="Core\PowerPC\JitArm64\JitArm64_BackPatch.cpp" />
|
||||
|
@ -21,6 +21,7 @@ if(_M_X86)
|
||||
)
|
||||
elseif(_M_ARM_64)
|
||||
add_dolphin_test(PowerPCTest
|
||||
PowerPC/JitArm64/ConvertSingleDouble.cpp
|
||||
PowerPC/JitArm64/MovI2R.cpp
|
||||
)
|
||||
endif()
|
||||
|
273
Source/UnitTests/Core/PowerPC/JitArm64/ConvertSingleDouble.cpp
Normal file
273
Source/UnitTests/Core/PowerPC/JitArm64/ConvertSingleDouble.cpp
Normal file
@ -0,0 +1,273 @@
|
||||
// Copyright 2021 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
|
||||
#include "Common/Arm64Emitter.h"
|
||||
#include "Common/BitUtils.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/FPURoundMode.h"
|
||||
#include "Core/PowerPC/Interpreter/Interpreter_FPUtils.h"
|
||||
#include "Core/PowerPC/JitArm64/Jit.h"
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
namespace
|
||||
{
|
||||
using namespace Arm64Gen;
|
||||
|
||||
// The ABI situation for returning an std::tuple seems annoying. Let's use this struct instead
|
||||
template <typename T>
|
||||
struct Pair
|
||||
{
|
||||
T value1;
|
||||
T value2;
|
||||
};
|
||||
|
||||
class TestConversion : private JitArm64
|
||||
{
|
||||
public:
|
||||
TestConversion()
|
||||
{
|
||||
AllocCodeSpace(4096);
|
||||
AddChildCodeSpace(&farcode, 2048);
|
||||
|
||||
gpr.Init(this);
|
||||
fpr.Init(this);
|
||||
|
||||
js.fpr_is_store_safe = BitSet32(0);
|
||||
|
||||
GetAsmRoutines()->cdts = GetCodePtr();
|
||||
GenerateConvertDoubleToSingle();
|
||||
GetAsmRoutines()->cstd = GetCodePtr();
|
||||
GenerateConvertSingleToDouble();
|
||||
|
||||
gpr.Lock(ARM64Reg::W30);
|
||||
fpr.Lock(ARM64Reg::Q0, ARM64Reg::Q1);
|
||||
|
||||
convert_single_to_double_lower = Common::BitCast<u64 (*)(u32)>(GetCodePtr());
|
||||
m_float_emit.INS(32, ARM64Reg::S0, 0, ARM64Reg::W0);
|
||||
ConvertSingleToDoubleLower(0, ARM64Reg::D0, ARM64Reg::S0, ARM64Reg::Q1);
|
||||
m_float_emit.UMOV(64, ARM64Reg::X0, ARM64Reg::D0, 0);
|
||||
RET();
|
||||
|
||||
convert_single_to_double_pair = Common::BitCast<Pair<u64> (*)(u32, u32)>(GetCodePtr());
|
||||
m_float_emit.INS(32, ARM64Reg::D0, 0, ARM64Reg::W0);
|
||||
m_float_emit.INS(32, ARM64Reg::D0, 1, ARM64Reg::W1);
|
||||
ConvertSingleToDoublePair(0, ARM64Reg::Q0, ARM64Reg::D0, ARM64Reg::Q1);
|
||||
m_float_emit.UMOV(64, ARM64Reg::X0, ARM64Reg::Q0, 0);
|
||||
m_float_emit.UMOV(64, ARM64Reg::X1, ARM64Reg::Q0, 1);
|
||||
RET();
|
||||
|
||||
convert_double_to_single_lower = Common::BitCast<u32 (*)(u64)>(GetCodePtr());
|
||||
m_float_emit.INS(64, ARM64Reg::D0, 0, ARM64Reg::X0);
|
||||
ConvertDoubleToSingleLower(0, ARM64Reg::S0, ARM64Reg::D0);
|
||||
m_float_emit.UMOV(32, ARM64Reg::W0, ARM64Reg::S0, 0);
|
||||
RET();
|
||||
|
||||
convert_double_to_single_pair = Common::BitCast<Pair<u32> (*)(u64, u64)>(GetCodePtr());
|
||||
m_float_emit.INS(64, ARM64Reg::Q0, 0, ARM64Reg::X0);
|
||||
m_float_emit.INS(64, ARM64Reg::Q0, 1, ARM64Reg::X1);
|
||||
ConvertDoubleToSinglePair(0, ARM64Reg::D0, ARM64Reg::Q0);
|
||||
m_float_emit.UMOV(64, ARM64Reg::X0, ARM64Reg::D0, 0);
|
||||
RET();
|
||||
|
||||
gpr.Unlock(ARM64Reg::W30);
|
||||
fpr.Unlock(ARM64Reg::Q0, ARM64Reg::Q1);
|
||||
|
||||
FlushIcache();
|
||||
|
||||
// Set the rounding mode to something that's as annoying as possible to handle
|
||||
// (flush-to-zero enabled, and rounding not symmetric about the origin)
|
||||
FPURoundMode::SetSIMDMode(FPURoundMode::RoundMode::ROUND_UP, true);
|
||||
}
|
||||
|
||||
~TestConversion() override
|
||||
{
|
||||
FPURoundMode::LoadDefaultSIMDState();
|
||||
|
||||
FreeCodeSpace();
|
||||
}
|
||||
|
||||
u64 ConvertSingleToDouble(u32 value) { return convert_single_to_double_lower(value); }
|
||||
|
||||
Pair<u64> ConvertSingleToDouble(u32 value1, u32 value2)
|
||||
{
|
||||
return convert_single_to_double_pair(value1, value2);
|
||||
}
|
||||
|
||||
u32 ConvertDoubleToSingle(u64 value) { return convert_double_to_single_lower(value); }
|
||||
|
||||
Pair<u32> ConvertDoubleToSingle(u64 value1, u64 value2)
|
||||
{
|
||||
return convert_double_to_single_pair(value1, value2);
|
||||
}
|
||||
|
||||
private:
|
||||
std::function<u64(u32)> convert_single_to_double_lower;
|
||||
std::function<Pair<u64>(u32, u32)> convert_single_to_double_pair;
|
||||
std::function<u32(u64)> convert_double_to_single_lower;
|
||||
std::function<Pair<u32>(u64, u64)> convert_double_to_single_pair;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
TEST(JitArm64, ConvertDoubleToSingle)
|
||||
{
|
||||
TestConversion test;
|
||||
|
||||
const std::vector<u64> input_values{
|
||||
// Special values
|
||||
0x0000'0000'0000'0000, // positive zero
|
||||
0x0000'0000'0000'0001, // smallest positive denormal
|
||||
0x0000'0000'0100'0000,
|
||||
0x000F'FFFF'FFFF'FFFF, // largest positive denormal
|
||||
0x0010'0000'0000'0000, // smallest positive normal
|
||||
0x0010'0000'0000'0002,
|
||||
0x3FF0'0000'0000'0000, // 1.0
|
||||
0x7FEF'FFFF'FFFF'FFFF, // largest positive normal
|
||||
0x7FF0'0000'0000'0000, // positive infinity
|
||||
0x7FF0'0000'0000'0001, // first positive SNaN
|
||||
0x7FF7'FFFF'FFFF'FFFF, // last positive SNaN
|
||||
0x7FF8'0000'0000'0000, // first positive QNaN
|
||||
0x7FFF'FFFF'FFFF'FFFF, // last positive QNaN
|
||||
0x8000'0000'0000'0000, // negative zero
|
||||
0x8000'0000'0000'0001, // smallest negative denormal
|
||||
0x8000'0000'0100'0000,
|
||||
0x800F'FFFF'FFFF'FFFF, // largest negative denormal
|
||||
0x8010'0000'0000'0000, // smallest negative normal
|
||||
0x8010'0000'0000'0002,
|
||||
0xBFF0'0000'0000'0000, // -1.0
|
||||
0xFFEF'FFFF'FFFF'FFFF, // largest negative normal
|
||||
0xFFF0'0000'0000'0000, // negative infinity
|
||||
0xFFF0'0000'0000'0001, // first negative SNaN
|
||||
0xFFF7'FFFF'FFFF'FFFF, // last negative SNaN
|
||||
0xFFF8'0000'0000'0000, // first negative QNaN
|
||||
0xFFFF'FFFF'FFFF'FFFF, // last negative QNaN
|
||||
|
||||
// (exp > 896) Boundary Case
|
||||
0x3800'0000'0000'0000, // 2^(-127) = Denormal in single-prec
|
||||
0x3810'0000'0000'0000, // 2^(-126) = Smallest single-prec normal
|
||||
0xB800'0000'0000'0000, // -2^(-127) = Denormal in single-prec
|
||||
0xB810'0000'0000'0000, // -2^(-126) = Smallest single-prec normal
|
||||
0x3800'1234'5678'9ABC, 0x3810'1234'5678'9ABC, 0xB800'1234'5678'9ABC, 0xB810'1234'5678'9ABC,
|
||||
|
||||
// (exp >= 874) Boundary Case
|
||||
0x3680'0000'0000'0000, // 2^(-150) = Unrepresentable in single-prec
|
||||
0x36A0'0000'0000'0000, // 2^(-149) = Smallest single-prec denormal
|
||||
0x36B0'0000'0000'0000, // 2^(-148) = Single-prec denormal
|
||||
0xB680'0000'0000'0000, // -2^(-150) = Unrepresentable in single-prec
|
||||
0xB6A0'0000'0000'0000, // -2^(-149) = Smallest single-prec denormal
|
||||
0xB6B0'0000'0000'0000, // -2^(-148) = Single-prec denormal
|
||||
0x3680'1234'5678'9ABC, 0x36A0'1234'5678'9ABC, 0x36B0'1234'5678'9ABC, 0xB680'1234'5678'9ABC,
|
||||
0xB6A0'1234'5678'9ABC, 0xB6B0'1234'5678'9ABC,
|
||||
|
||||
// Some typical numbers
|
||||
0x3FF8'0000'0000'0000, // 1.5
|
||||
0x408F'4000'0000'0000, // 1000
|
||||
0xC008'0000'0000'0000, // -3
|
||||
};
|
||||
|
||||
for (const u64 input : input_values)
|
||||
{
|
||||
const u32 expected = ConvertToSingle(input);
|
||||
const u32 actual = test.ConvertDoubleToSingle(input);
|
||||
|
||||
if (expected != actual)
|
||||
fmt::print("{:016x} -> {:08x} == {:08x}\n", input, actual, expected);
|
||||
|
||||
EXPECT_EQ(expected, actual);
|
||||
}
|
||||
|
||||
for (const u64 input1 : input_values)
|
||||
{
|
||||
for (const u64 input2 : input_values)
|
||||
{
|
||||
const u32 expected1 = ConvertToSingle(input1);
|
||||
const u32 expected2 = ConvertToSingle(input2);
|
||||
const auto [actual1, actual2] = test.ConvertDoubleToSingle(input1, input2);
|
||||
|
||||
if (expected1 != actual1 || expected2 != actual2)
|
||||
{
|
||||
fmt::print("{:016x} -> {:08x} == {:08x},\n", input1, actual1, expected1);
|
||||
fmt::print("{:016x} -> {:08x} == {:08x}\n", input2, actual2, expected2);
|
||||
}
|
||||
|
||||
EXPECT_EQ(expected1, actual1);
|
||||
EXPECT_EQ(expected2, actual2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(JitArm64, ConvertSingleToDouble)
|
||||
{
|
||||
TestConversion test;
|
||||
|
||||
const std::vector<u32> input_values{
|
||||
// Special values
|
||||
0x0000'0000, // positive zero
|
||||
0x0000'0001, // smallest positive denormal
|
||||
0x0000'1000,
|
||||
0x007F'FFFF, // largest positive denormal
|
||||
0x0080'0000, // smallest positive normal
|
||||
0x0080'0002,
|
||||
0x3F80'0000, // 1.0
|
||||
0x7F7F'FFFF, // largest positive normal
|
||||
0x7F80'0000, // positive infinity
|
||||
0x7F80'0001, // first positive SNaN
|
||||
0x7FBF'FFFF, // last positive SNaN
|
||||
0x7FC0'0000, // first positive QNaN
|
||||
0x7FFF'FFFF, // last positive QNaN
|
||||
0x8000'0000, // negative zero
|
||||
0x8000'0001, // smallest negative denormal
|
||||
0x8000'1000,
|
||||
0x807F'FFFF, // largest negative denormal
|
||||
0x8080'0000, // smallest negative normal
|
||||
0x8080'0002,
|
||||
0xBFF0'0000, // -1.0
|
||||
0xFF7F'FFFF, // largest negative normal
|
||||
0xFF80'0000, // negative infinity
|
||||
0xFF80'0001, // first negative SNaN
|
||||
0xFFBF'FFFF, // last negative SNaN
|
||||
0xFFC0'0000, // first negative QNaN
|
||||
0xFFFF'FFFF, // last negative QNaN
|
||||
|
||||
// Some typical numbers
|
||||
0x3FC0'0000, // 1.5
|
||||
0x447A'0000, // 1000
|
||||
0xC040'0000, // -3
|
||||
};
|
||||
|
||||
for (const u32 input : input_values)
|
||||
{
|
||||
const u64 expected = ConvertToDouble(input);
|
||||
const u64 actual = test.ConvertSingleToDouble(input);
|
||||
|
||||
if (expected != actual)
|
||||
fmt::print("{:08x} -> {:016x} == {:016x}\n", input, actual, expected);
|
||||
|
||||
EXPECT_EQ(expected, actual);
|
||||
}
|
||||
|
||||
for (const u32 input1 : input_values)
|
||||
{
|
||||
for (const u32 input2 : input_values)
|
||||
{
|
||||
const u64 expected1 = ConvertToDouble(input1);
|
||||
const u64 expected2 = ConvertToDouble(input2);
|
||||
const auto [actual1, actual2] = test.ConvertSingleToDouble(input1, input2);
|
||||
|
||||
if (expected1 != actual1 || expected2 != actual2)
|
||||
{
|
||||
fmt::print("{:08x} -> {:016x} == {:016x},\n", input1, actual1, expected1);
|
||||
fmt::print("{:08x} -> {:016x} == {:016x}\n", input2, actual2, expected2);
|
||||
}
|
||||
|
||||
EXPECT_EQ(expected1, actual1);
|
||||
EXPECT_EQ(expected2, actual2);
|
||||
}
|
||||
}
|
||||
}
|
@ -81,6 +81,7 @@
|
||||
<ClCompile Include="Core\PowerPC\Jit64Common\Frsqrte.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup Condition="'$(Platform)'=='ARM64'">
|
||||
<ClCompile Include="Core\PowerPC\JitArm64\ConvertSingleDouble.cpp" />
|
||||
<ClCompile Include="Core\PowerPC\JitArm64\MovI2R.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
|
Loading…
x
Reference in New Issue
Block a user