From 28e4869c432e9f2ebaf90ec1e27aca16bea2dd21 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 24 Jan 2021 20:18:43 +0100 Subject: [PATCH] JitArm64: Optimize ConvertDoubleToSingle --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 2 ++ .../JitArm64/JitArm64_FloatingPoint.cpp | 16 ++++----- Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 35 +++++++++++++++++-- 3 files changed, 43 insertions(+), 10 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 4f8ea466f4..9d98910660 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -220,6 +220,8 @@ private: // AsmRoutines void GenerateAsm(); void GenerateCommonAsm(); + void GenerateConvertDoubleToSingle(); + void GenerateQuantizedLoadStores(); // Profiling void BeginTimeProfile(JitBlock* b); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index 4a4830ef63..50c9d1f85c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -397,12 +397,12 @@ void JitArm64::ConvertDoubleToSingleLower(ARM64Reg dest_reg, ARM64Reg src_reg) { FlushCarry(); - const BitSet32 gpr_saved = gpr.GetCallerSavedUsed(); + const BitSet32 gpr_saved = gpr.GetCallerSavedUsed() & BitSet32{0, 1, 2, 3, 30}; ABI_PushRegisters(gpr_saved); m_float_emit.UMOV(64, ARM64Reg::X0, src_reg, 0); - QuickCallFunction(ARM64Reg::X1, &ConvertToSingle); - m_float_emit.INS(32, dest_reg, 0, ARM64Reg::W0); + BL(cdts); + m_float_emit.INS(32, dest_reg, 0, ARM64Reg::W1); ABI_PopRegisters(gpr_saved); } @@ -411,16 +411,16 @@ void JitArm64::ConvertDoubleToSinglePair(ARM64Reg dest_reg, ARM64Reg src_reg) { FlushCarry(); - const BitSet32 gpr_saved = gpr.GetCallerSavedUsed(); + const BitSet32 gpr_saved = gpr.GetCallerSavedUsed() & BitSet32{0, 1, 2, 3, 30}; ABI_PushRegisters(gpr_saved); m_float_emit.UMOV(64, ARM64Reg::X0, src_reg, 0); - QuickCallFunction(ARM64Reg::X1, &ConvertToSingle); - m_float_emit.INS(32, dest_reg, 0, ARM64Reg::W0); + BL(cdts); + m_float_emit.INS(32, dest_reg, 0, ARM64Reg::W1); m_float_emit.UMOV(64, ARM64Reg::X0, src_reg, 1); - QuickCallFunction(ARM64Reg::X1, &ConvertToSingle); - m_float_emit.INS(32, dest_reg, 1, ARM64Reg::W0); + BL(cdts); + m_float_emit.INS(32, dest_reg, 1, ARM64Reg::W1); ABI_PopRegisters(gpr_saved); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index 3b426a6214..c686c31ce4 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -194,6 +194,39 @@ void JitArm64::GenerateAsm() } void JitArm64::GenerateCommonAsm() +{ + GetAsmRoutines()->cdts = GetCodePtr(); + GenerateConvertDoubleToSingle(); + JitRegister::Register(GetAsmRoutines()->cdts, GetCodePtr(), "JIT_cdts"); + + GenerateQuantizedLoadStores(); +} + +// Input in X0, output in W1, clobbers X0-X3 and flags. +void JitArm64::GenerateConvertDoubleToSingle() +{ + UBFX(ARM64Reg::X2, ARM64Reg::X0, 52, 11); + SUB(ARM64Reg::W3, ARM64Reg::W2, 874); + CMP(ARM64Reg::W3, 896 - 874); + LSR(ARM64Reg::X1, ARM64Reg::X0, 32); + FixupBranch denormal = B(CCFlags::CC_LS); + + ANDI2R(ARM64Reg::X1, ARM64Reg::X1, 0xc0000000); + BFXIL(ARM64Reg::X1, ARM64Reg::X0, 29, 30); + RET(); + + SetJumpTarget(denormal); + LSR(ARM64Reg::X3, ARM64Reg::X0, 21); + MOVZ(ARM64Reg::X0, 905); + ORRI2R(ARM64Reg::W3, ARM64Reg::W3, 0x80000000); + SUB(ARM64Reg::W2, ARM64Reg::W0, ARM64Reg::W2); + LSRV(ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::W2); + ANDI2R(ARM64Reg::X3, ARM64Reg::X1, 0x80000000); + ORR(ARM64Reg::X1, ARM64Reg::X3, ARM64Reg::X2); + RET(); +} + +void JitArm64::GenerateQuantizedLoadStores() { // X0 is the scale // X1 is address @@ -654,6 +687,4 @@ void JitArm64::GenerateCommonAsm() paired_store_quantized[29] = storeSingleU16Slow; paired_store_quantized[30] = storeSingleS8Slow; paired_store_quantized[31] = storeSingleS16Slow; - - GetAsmRoutines()->mfcr = nullptr; }