From f7ed979e30068814eea8e4eb808ceccb80f37576 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Tue, 11 Apr 2017 17:01:29 +0100 Subject: [PATCH] Jit64AsmCommon: Make frsqrte and fres PIE-compliant --- Source/Core/Common/MathUtil.cpp | 48 +++++++++---------- Source/Core/Common/MathUtil.h | 12 +++-- .../PowerPC/Jit64Common/Jit64AsmCommon.cpp | 24 ++++++++-- 3 files changed, 51 insertions(+), 33 deletions(-) diff --git a/Source/Core/Common/MathUtil.cpp b/Source/Core/Common/MathUtil.cpp index bdb45e89f9..75c7f0278a 100644 --- a/Source/Core/Common/MathUtil.cpp +++ b/Source/Core/Common/MathUtil.cpp @@ -90,17 +90,16 @@ u32 ClassifyFloat(float fvalue) } } -const int frsqrte_expected_base[] = { - 0x3ffa000, 0x3c29000, 0x38aa000, 0x3572000, 0x3279000, 0x2fb7000, 0x2d26000, 0x2ac0000, - 0x2881000, 0x2665000, 0x2468000, 0x2287000, 0x20c1000, 0x1f12000, 0x1d79000, 0x1bf4000, - 0x1a7e800, 0x17cb800, 0x1552800, 0x130c000, 0x10f2000, 0x0eff000, 0x0d2e000, 0x0b7c000, - 0x09e5000, 0x0867000, 0x06ff000, 0x05ab800, 0x046a000, 0x0339800, 0x0218800, 0x0105800, -}; -const int frsqrte_expected_dec[] = { - 0x7a4, 0x700, 0x670, 0x5f2, 0x584, 0x524, 0x4cc, 0x47e, 0x43a, 0x3fa, 0x3c2, - 0x38e, 0x35e, 0x332, 0x30a, 0x2e6, 0x568, 0x4f3, 0x48d, 0x435, 0x3e7, 0x3a2, - 0x365, 0x32e, 0x2fc, 0x2d0, 0x2a8, 0x283, 0x261, 0x243, 0x226, 0x20b, -}; +const std::array frsqrte_expected = {{ + {0x3ffa000, 0x7a4}, {0x3c29000, 0x700}, {0x38aa000, 0x670}, {0x3572000, 0x5f2}, + {0x3279000, 0x584}, {0x2fb7000, 0x524}, {0x2d26000, 0x4cc}, {0x2ac0000, 0x47e}, + {0x2881000, 0x43a}, {0x2665000, 0x3fa}, {0x2468000, 0x3c2}, {0x2287000, 0x38e}, + {0x20c1000, 0x35e}, {0x1f12000, 0x332}, {0x1d79000, 0x30a}, {0x1bf4000, 0x2e6}, + {0x1a7e800, 0x568}, {0x17cb800, 0x4f3}, {0x1552800, 0x48d}, {0x130c000, 0x435}, + {0x10f2000, 0x3e7}, {0x0eff000, 0x3a2}, {0x0d2e000, 0x365}, {0x0b7c000, 0x32e}, + {0x09e5000, 0x2fc}, {0x0867000, 0x2d0}, {0x06ff000, 0x2a8}, {0x05ab800, 0x283}, + {0x046a000, 0x261}, {0x0339800, 0x243}, {0x0218800, 0x226}, {0x0105800, 0x20b}, +}}; double ApproximateReciprocalSquareRoot(double val) { @@ -154,21 +153,20 @@ double ApproximateReciprocalSquareRoot(double val) int i = (int)(mantissa >> 37); vali = sign | exponent; int index = i / 2048 + (odd_exponent ? 16 : 0); - vali |= (s64)(frsqrte_expected_base[index] - frsqrte_expected_dec[index] * (i % 2048)) << 26; + auto& entry = frsqrte_expected[index]; + vali |= (s64)(entry.m_base - entry.m_dec * (i % 2048)) << 26; return valf; } -const int fres_expected_base[] = { - 0x7ff800, 0x783800, 0x70ea00, 0x6a0800, 0x638800, 0x5d6200, 0x579000, 0x520800, - 0x4cc800, 0x47ca00, 0x430800, 0x3e8000, 0x3a2c00, 0x360800, 0x321400, 0x2e4a00, - 0x2aa800, 0x272c00, 0x23d600, 0x209e00, 0x1d8800, 0x1a9000, 0x17ae00, 0x14f800, - 0x124400, 0x0fbe00, 0x0d3800, 0x0ade00, 0x088400, 0x065000, 0x041c00, 0x020c00, -}; -const int fres_expected_dec[] = { - 0x3e1, 0x3a7, 0x371, 0x340, 0x313, 0x2ea, 0x2c4, 0x2a0, 0x27f, 0x261, 0x245, - 0x22a, 0x212, 0x1fb, 0x1e5, 0x1d1, 0x1be, 0x1ac, 0x19b, 0x18b, 0x17c, 0x16e, - 0x15b, 0x15b, 0x143, 0x143, 0x12d, 0x12d, 0x11a, 0x11a, 0x108, 0x106, -}; +const std::array fres_expected = {{ + {0x7ff800, 0x3e1}, {0x783800, 0x3a7}, {0x70ea00, 0x371}, {0x6a0800, 0x340}, {0x638800, 0x313}, + {0x5d6200, 0x2ea}, {0x579000, 0x2c4}, {0x520800, 0x2a0}, {0x4cc800, 0x27f}, {0x47ca00, 0x261}, + {0x430800, 0x245}, {0x3e8000, 0x22a}, {0x3a2c00, 0x212}, {0x360800, 0x1fb}, {0x321400, 0x1e5}, + {0x2e4a00, 0x1d1}, {0x2aa800, 0x1be}, {0x272c00, 0x1ac}, {0x23d600, 0x19b}, {0x209e00, 0x18b}, + {0x1d8800, 0x17c}, {0x1a9000, 0x16e}, {0x17ae00, 0x15b}, {0x14f800, 0x15b}, {0x124400, 0x143}, + {0x0fbe00, 0x143}, {0x0d3800, 0x12d}, {0x0ade00, 0x12d}, {0x088400, 0x11a}, {0x065000, 0x11a}, + {0x041c00, 0x108}, {0x020c00, 0x106}, +}}; // Used by fres and ps_res. double ApproximateReciprocal(double val) @@ -213,9 +211,9 @@ double ApproximateReciprocal(double val) exponent = (0x7FDLL << 52) - exponent; int i = (int)(mantissa >> 37); + auto& entry = fres_expected[i / 1024]; vali = sign | exponent; - vali |= (s64)(fres_expected_base[i / 1024] - (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2) - << 29; + vali |= (s64)(entry.m_base - (entry.m_dec * (i % 1024) + 1) / 2) << 29; return valf; } diff --git a/Source/Core/Common/MathUtil.h b/Source/Core/Common/MathUtil.h index e91358c5c1..7e48e38d80 100644 --- a/Source/Core/Common/MathUtil.h +++ b/Source/Core/Common/MathUtil.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include @@ -131,10 +132,13 @@ u32 ClassifyDouble(double dvalue); // More efficient float version. u32 ClassifyFloat(float fvalue); -extern const int frsqrte_expected_base[]; -extern const int frsqrte_expected_dec[]; -extern const int fres_expected_base[]; -extern const int fres_expected_dec[]; +struct BaseAndDec +{ + int m_base; + int m_dec; +}; +extern const std::array frsqrte_expected; +extern const std::array fres_expected; // PowerPC approximation algorithms double ApproximateReciprocalSquareRoot(double val); diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp index 876d24c73e..dce31b4d4b 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp @@ -69,13 +69,20 @@ void CommonAsmRoutines::GenFrsqrte() AND(32, R(RSCRATCH_EXTRA), Imm8(0x1F)); XOR(32, R(RSCRATCH_EXTRA), Imm8(0x10)); // int index = i / 2048 + (odd_exponent ? 16 : 0); + PUSH(RSCRATCH2); + MOV(64, R(RSCRATCH2), ImmPtr(GetConstantFromPool(MathUtil::frsqrte_expected))); + static_assert(sizeof(MathUtil::BaseAndDec) == 8, "Unable to use SCALE_8; incorrect size"); + SHR(64, R(RSCRATCH), Imm8(37)); AND(32, R(RSCRATCH), Imm32(0x7FF)); - IMUL(32, RSCRATCH, MScaled(RSCRATCH_EXTRA, SCALE_4, PtrOffset(MathUtil::frsqrte_expected_dec))); + IMUL(32, RSCRATCH, + MComplex(RSCRATCH2, RSCRATCH_EXTRA, SCALE_8, offsetof(MathUtil::BaseAndDec, m_dec))); MOV(32, R(RSCRATCH_EXTRA), - MScaled(RSCRATCH_EXTRA, SCALE_4, PtrOffset(MathUtil::frsqrte_expected_base))); + MComplex(RSCRATCH2, RSCRATCH_EXTRA, SCALE_8, offsetof(MathUtil::BaseAndDec, m_base))); SUB(32, R(RSCRATCH_EXTRA), R(RSCRATCH)); SHL(64, R(RSCRATCH_EXTRA), Imm8(26)); + + POP(RSCRATCH2); OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(frsqrte_expected_base[index] - // frsqrte_expected_dec[index] * (i % 2048)) << 26; MOVQ_xmm(XMM0, R(RSCRATCH2)); @@ -140,13 +147,22 @@ void CommonAsmRoutines::GenFres() AND(32, R(RSCRATCH), Imm32(0x3FF)); // i % 1024 AND(32, R(RSCRATCH2), Imm8(0x1F)); // i / 1024 - IMUL(32, RSCRATCH, MScaled(RSCRATCH2, SCALE_4, PtrOffset(MathUtil::fres_expected_dec))); + PUSH(RSCRATCH_EXTRA); + MOV(64, R(RSCRATCH_EXTRA), ImmPtr(GetConstantFromPool(MathUtil::fres_expected))); + static_assert(sizeof(MathUtil::BaseAndDec) == 8, "Unable to use SCALE_8; incorrect size"); + + IMUL(32, RSCRATCH, + MComplex(RSCRATCH_EXTRA, RSCRATCH2, SCALE_8, offsetof(MathUtil::BaseAndDec, m_dec))); ADD(32, R(RSCRATCH), Imm8(1)); SHR(32, R(RSCRATCH), Imm8(1)); - MOV(32, R(RSCRATCH2), MScaled(RSCRATCH2, SCALE_4, PtrOffset(MathUtil::fres_expected_base))); + MOV(32, R(RSCRATCH2), + MComplex(RSCRATCH_EXTRA, RSCRATCH2, SCALE_8, offsetof(MathUtil::BaseAndDec, m_base))); SUB(32, R(RSCRATCH2), R(RSCRATCH)); SHL(64, R(RSCRATCH2), Imm8(29)); + + POP(RSCRATCH_EXTRA); + OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(fres_expected_base[i / 1024] - // (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2) // << 29