diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index 9b71997270..474ead9590 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -26,20 +26,6 @@ namespace const int kWRegSizeInBits = 32; const int kXRegSizeInBits = 64; -// The below few functions are taken from V8. -int CountLeadingZeros(uint64_t value, int width) -{ - // TODO(jbramley): Optimize this for ARM64 hosts. - int count = 0; - uint64_t bit_test = 1ULL << (width - 1); - while ((count < width) && ((bit_test & value) == 0)) - { - count++; - bit_test >>= 1; - } - return count; -} - uint64_t LargestPowerOf2Divisor(uint64_t value) { return value & -(int64_t)value; @@ -155,8 +141,8 @@ bool IsImmLogical(uint64_t value, unsigned int width, unsigned int* n, unsigned // Compute the repeat distance d, and set up a bitmask covering the basic // unit of repetition (i.e. a word with the bottom d bits set). Also, in all // of these cases the N bit of the output will be zero. - clz_a = CountLeadingZeros(a, kXRegSizeInBits); - int clz_c = CountLeadingZeros(c, kXRegSizeInBits); + clz_a = Common::CountLeadingZeros(a); + int clz_c = Common::CountLeadingZeros(c); d = clz_a - clz_c; mask = ((UINT64_C(1) << d) - 1); out_n = 0; @@ -182,7 +168,7 @@ bool IsImmLogical(uint64_t value, unsigned int width, unsigned int* n, unsigned // of set bits in our word, meaning that we have the trivial case of // d == 64 and only one 'repetition'. Set up all the same variables as in // the general case above, and set the N bit in the output. - clz_a = CountLeadingZeros(a, kXRegSizeInBits); + clz_a = Common::CountLeadingZeros(a); d = 64; mask = ~UINT64_C(0); out_n = 1; @@ -214,7 +200,7 @@ bool IsImmLogical(uint64_t value, unsigned int width, unsigned int* n, unsigned 0x5555555555555555UL, }}; - int multiplier_idx = CountLeadingZeros(d, kXRegSizeInBits) - 57; + int multiplier_idx = Common::CountLeadingZeros((u64)d) - 57; // Ensure that the index to the multipliers array is within bounds. DEBUG_ASSERT((multiplier_idx >= 0) && (static_cast(multiplier_idx) < multipliers.size())); @@ -233,7 +219,7 @@ bool IsImmLogical(uint64_t value, unsigned int width, unsigned int* n, unsigned // Count the set bits in our basic stretch. The special case of clz(0) == -1 // makes the answer come out right for stretches that reach the very top of // the word (e.g. numbers like 0xffffc00000000000). - int clz_b = (b == 0) ? -1 : CountLeadingZeros(b, kXRegSizeInBits); + int clz_b = (b == 0) ? -1 : Common::CountLeadingZeros(b); int s = clz_a - clz_b; // Decide how many bits to rotate right by, to put the low bit of that basic diff --git a/Source/Core/Common/BitUtils.h b/Source/Core/Common/BitUtils.h index 7a1b65f587..fbc6ec2ba6 100644 --- a/Source/Core/Common/BitUtils.h +++ b/Source/Core/Common/BitUtils.h @@ -11,6 +11,10 @@ #include #include +#ifdef _MSC_VER +#include +#endif + namespace Common { /// @@ -357,4 +361,44 @@ T ExpandValue(T value, size_t left_shift_amount) (T(-ExtractBit<0>(value)) >> (BitSize() - left_shift_amount)); } +constexpr int CountLeadingZeros(uint64_t value) +{ +#if defined(__GNUC__) + return __builtin_clzll(value); +#elif defined(_MSC_VER) && defined(_M_ARM_64) + return _CountLeadingZeros64(value); +#elif defined(_MSC_VER) && defined(_M_X86_64) + unsigned long index; + return _BitScanReverse64(&index, value) ? 63 - index : 64; +#else + int result = 64; + while (value) + { + result--; + value >>= 1; + } + return result; +#endif +} + +constexpr int CountLeadingZeros(uint32_t value) +{ +#if defined(__GNUC__) + return __builtin_clz(value); +#elif defined(_MSC_VER) && defined(_M_ARM_64) + return _CountLeadingZeros(value); +#elif defined(_MSC_VER) && defined(_M_X86_64) + unsigned long index; + return _BitScanReverse(&index, value) ? 31 - index : 32; +#else + int result = 32; + while (value) + { + result--; + value >>= 1; + } + return result; +#endif +} + } // namespace Common diff --git a/Source/Core/Common/MathUtil.h b/Source/Core/Common/MathUtil.h index 5ad9ee0dcd..18a0c305ce 100644 --- a/Source/Core/Common/MathUtil.h +++ b/Source/Core/Common/MathUtil.h @@ -9,12 +9,9 @@ #include #include +#include "Common/BitUtils.h" #include "Common/CommonTypes.h" -#ifdef _MSC_VER -#include -#endif - namespace MathUtil { constexpr double TAU = 6.2831853071795865; @@ -154,21 +151,5 @@ float MathFloatVectorSum(const std::vector&); // Rounds down. 0 -> undefined inline int IntLog2(u64 val) { -#if defined(__GNUC__) - return 63 - __builtin_clzll(val); - -#elif defined(_MSC_VER) - unsigned long result = ULONG_MAX; - _BitScanReverse64(&result, val); - return result; - -#else - int result = -1; - while (val != 0) - { - val >>= 1; - ++result; - } - return result; -#endif + return 63 - Common::CountLeadingZeros(val); } diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp index 0f57158c98..bd08ae5578 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp @@ -236,17 +236,7 @@ void Interpreter::cmpl(UGeckoInstruction inst) void Interpreter::cntlzwx(UGeckoInstruction inst) { - u32 val = rGPR[inst.RS]; - u32 mask = 0x80000000; - - int i = 0; - for (; i < 32; i++, mask >>= 1) - { - if (val & mask) - break; - } - - rGPR[inst.RA] = i; + rGPR[inst.RA] = Common::CountLeadingZeros(rGPR[inst.RS]); if (inst.Rc) Helper_UpdateCR0(rGPR[inst.RA]); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index b0a9cf4cf1..db3138fd5f 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1956,14 +1956,7 @@ void Jit64::cntlzwx(UGeckoInstruction inst) if (gpr.IsImm(s)) { - u32 mask = 0x80000000; - u32 i = 0; - for (; i < 32; i++, mask >>= 1) - { - if (gpr.Imm32(s) & mask) - break; - } - gpr.SetImmediate32(a, i); + gpr.SetImmediate32(a, Common::CountLeadingZeros(gpr.Imm32(s))); } else { diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 8a2062b239..645ffc4af5 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -368,11 +368,7 @@ void JitArm64::cntlzwx(UGeckoInstruction inst) if (gpr.IsImm(s)) { -#ifdef _MSC_VER - gpr.SetImmediate(a, _CountLeadingZeros(gpr.GetImm(s))); -#else - gpr.SetImmediate(a, __builtin_clz(gpr.GetImm(s))); -#endif + gpr.SetImmediate(a, Common::CountLeadingZeros(gpr.GetImm(s))); if (inst.Rc) ComputeRC0(gpr.GetImm(a)); }