Merge pull request #9374 from MerryMage/clz

BitUtils: Add CountLeadingZeros
This commit is contained in:
LC 2020-12-27 22:18:46 -05:00 committed by GitHub
commit c163bc3187
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 54 additions and 64 deletions

View File

@ -26,20 +26,6 @@ namespace
const int kWRegSizeInBits = 32;
const int kXRegSizeInBits = 64;
// The below few functions are taken from V8.
int CountLeadingZeros(uint64_t value, int width)
{
// TODO(jbramley): Optimize this for ARM64 hosts.
int count = 0;
uint64_t bit_test = 1ULL << (width - 1);
while ((count < width) && ((bit_test & value) == 0))
{
count++;
bit_test >>= 1;
}
return count;
}
uint64_t LargestPowerOf2Divisor(uint64_t value)
{
return value & -(int64_t)value;
@ -155,8 +141,8 @@ bool IsImmLogical(uint64_t value, unsigned int width, unsigned int* n, unsigned
// Compute the repeat distance d, and set up a bitmask covering the basic
// unit of repetition (i.e. a word with the bottom d bits set). Also, in all
// of these cases the N bit of the output will be zero.
clz_a = CountLeadingZeros(a, kXRegSizeInBits);
int clz_c = CountLeadingZeros(c, kXRegSizeInBits);
clz_a = Common::CountLeadingZeros(a);
int clz_c = Common::CountLeadingZeros(c);
d = clz_a - clz_c;
mask = ((UINT64_C(1) << d) - 1);
out_n = 0;
@ -182,7 +168,7 @@ bool IsImmLogical(uint64_t value, unsigned int width, unsigned int* n, unsigned
// of set bits in our word, meaning that we have the trivial case of
// d == 64 and only one 'repetition'. Set up all the same variables as in
// the general case above, and set the N bit in the output.
clz_a = CountLeadingZeros(a, kXRegSizeInBits);
clz_a = Common::CountLeadingZeros(a);
d = 64;
mask = ~UINT64_C(0);
out_n = 1;
@ -214,7 +200,7 @@ bool IsImmLogical(uint64_t value, unsigned int width, unsigned int* n, unsigned
0x5555555555555555UL,
}};
int multiplier_idx = CountLeadingZeros(d, kXRegSizeInBits) - 57;
int multiplier_idx = Common::CountLeadingZeros((u64)d) - 57;
// Ensure that the index to the multipliers array is within bounds.
DEBUG_ASSERT((multiplier_idx >= 0) && (static_cast<size_t>(multiplier_idx) < multipliers.size()));
@ -233,7 +219,7 @@ bool IsImmLogical(uint64_t value, unsigned int width, unsigned int* n, unsigned
// Count the set bits in our basic stretch. The special case of clz(0) == -1
// makes the answer come out right for stretches that reach the very top of
// the word (e.g. numbers like 0xffffc00000000000).
int clz_b = (b == 0) ? -1 : CountLeadingZeros(b, kXRegSizeInBits);
int clz_b = (b == 0) ? -1 : Common::CountLeadingZeros(b);
int s = clz_a - clz_b;
// Decide how many bits to rotate right by, to put the low bit of that basic

View File

@ -11,6 +11,10 @@
#include <initializer_list>
#include <type_traits>
#ifdef _MSC_VER
#include <intrin.h>
#endif
namespace Common
{
///
@ -357,4 +361,44 @@ T ExpandValue(T value, size_t left_shift_amount)
(T(-ExtractBit<0>(value)) >> (BitSize<T>() - left_shift_amount));
}
constexpr int CountLeadingZeros(uint64_t value)
{
#if defined(__GNUC__)
return __builtin_clzll(value);
#elif defined(_MSC_VER) && defined(_M_ARM_64)
return _CountLeadingZeros64(value);
#elif defined(_MSC_VER) && defined(_M_X86_64)
unsigned long index;
return _BitScanReverse64(&index, value) ? 63 - index : 64;
#else
int result = 64;
while (value)
{
result--;
value >>= 1;
}
return result;
#endif
}
constexpr int CountLeadingZeros(uint32_t value)
{
#if defined(__GNUC__)
return __builtin_clz(value);
#elif defined(_MSC_VER) && defined(_M_ARM_64)
return _CountLeadingZeros(value);
#elif defined(_MSC_VER) && defined(_M_X86_64)
unsigned long index;
return _BitScanReverse(&index, value) ? 31 - index : 32;
#else
int result = 32;
while (value)
{
result--;
value >>= 1;
}
return result;
#endif
}
} // namespace Common

View File

@ -9,12 +9,9 @@
#include <type_traits>
#include <vector>
#include "Common/BitUtils.h"
#include "Common/CommonTypes.h"
#ifdef _MSC_VER
#include <intrin.h>
#endif
namespace MathUtil
{
constexpr double TAU = 6.2831853071795865;
@ -154,21 +151,5 @@ float MathFloatVectorSum(const std::vector<float>&);
// Rounds down. 0 -> undefined
inline int IntLog2(u64 val)
{
#if defined(__GNUC__)
return 63 - __builtin_clzll(val);
#elif defined(_MSC_VER)
unsigned long result = ULONG_MAX;
_BitScanReverse64(&result, val);
return result;
#else
int result = -1;
while (val != 0)
{
val >>= 1;
++result;
}
return result;
#endif
return 63 - Common::CountLeadingZeros(val);
}

View File

@ -236,17 +236,7 @@ void Interpreter::cmpl(UGeckoInstruction inst)
void Interpreter::cntlzwx(UGeckoInstruction inst)
{
u32 val = rGPR[inst.RS];
u32 mask = 0x80000000;
int i = 0;
for (; i < 32; i++, mask >>= 1)
{
if (val & mask)
break;
}
rGPR[inst.RA] = i;
rGPR[inst.RA] = Common::CountLeadingZeros(rGPR[inst.RS]);
if (inst.Rc)
Helper_UpdateCR0(rGPR[inst.RA]);

View File

@ -1956,14 +1956,7 @@ void Jit64::cntlzwx(UGeckoInstruction inst)
if (gpr.IsImm(s))
{
u32 mask = 0x80000000;
u32 i = 0;
for (; i < 32; i++, mask >>= 1)
{
if (gpr.Imm32(s) & mask)
break;
}
gpr.SetImmediate32(a, i);
gpr.SetImmediate32(a, Common::CountLeadingZeros(gpr.Imm32(s)));
}
else
{

View File

@ -368,11 +368,7 @@ void JitArm64::cntlzwx(UGeckoInstruction inst)
if (gpr.IsImm(s))
{
#ifdef _MSC_VER
gpr.SetImmediate(a, _CountLeadingZeros(gpr.GetImm(s)));
#else
gpr.SetImmediate(a, __builtin_clz(gpr.GetImm(s)));
#endif
gpr.SetImmediate(a, Common::CountLeadingZeros(gpr.GetImm(s)));
if (inst.Rc)
ComputeRC0(gpr.GetImm(a));
}