the-algorithm/src/java/com/twitter/search/common/encoding/features/SingleBytePositiveFloatUtil...

165 lines
5.1 KiB
Java

package com.twitter.search.common.encoding.features;
/**
* Util used to:
* - Encode a positive Java float into a single byte float
* - Decode a single byte into a positive Java float
*
* Configuration:
* - Exponent: higher 4 bits, base 10.
* - Mantissa: lower 4 bit, representing 1.0 to 9.0
* - Exponent bias is 1.
*
* Formula:
* Max(Mantissa, 9) * 10 ^ (Exponent - 1)
*
* Smallest float: 0.0 (0000 0000)
* Smallest positive float: 1.0 * 10^-1 (0000 0001)
* Largest float: 9.0 * 10^13 (1110 1111)
* Infinity: (1111 0000)
* NaN: (1111 1000)
*/
public final class SingleBytePositiveFloatUtil {
private SingleBytePositiveFloatUtil() { }
// 4 bits mantissa. Range [1.0, 10.0) is divided into 16 steps
public static final byte MAX_BYTE_VALUE = (byte) 0xEF;
public static final byte INFINITY = (byte) 0xF0;
public static final byte NOT_A_NUMBER = (byte) 0xF8;
private static final float STEP_SIZE = 1.0f;
private static final int EXPONENT_BIAS = 1;
private static final byte MIN_EXPONENT = -EXPONENT_BIAS;
private static final int MAX_EXPONENT = 14 - EXPONENT_BIAS;
private static final byte MANTISSA_MASK = 0x0F;
/**
* Converts the given float into a single byte floating point number.
* This is used in the updater and OK to be a bit slow.
*/
public static byte toSingleBytePositiveFloat(float f) {
if (f < 0) {
throw new UnsupportedOperationException(
"Cannot encode negative floats into SingleBytePostiveFloat.");
}
if (Float.compare(f, Float.POSITIVE_INFINITY) == 0) {
return INFINITY;
}
if (Float.compare(f, Float.NaN) == 0) {
return NOT_A_NUMBER;
}
int mantissa = 0;
int exponent = (int) Math.floor(Math.log10(f));
// Overflow (Number too large), just return the largest possible value
if (exponent > MAX_EXPONENT) {
return MAX_BYTE_VALUE;
}
// Underflow (Number too small), just return 0
if (exponent < MIN_EXPONENT) {
return 0;
}
int frac = Math.round(f / (float) Math.pow(10.0f, exponent) / STEP_SIZE);
mantissa = fractionToMantissaTable[frac];
return (byte) (((exponent + EXPONENT_BIAS) << 4) | mantissa);
}
/**
* Called in Earlybird per hit and needs to be fast.
*/
public static float toJavaFloat(byte b) {
return BYTE_TO_FLOAT_CONVERSION_TABLE[b & 0xff];
}
// Table used for converting mantissa into a significant
private static float[] mantissaToFractionTable = {
// Decimal Matisa value
STEP_SIZE * 0, // 0000
STEP_SIZE * 1, // 0001
STEP_SIZE * 1, // 0010
STEP_SIZE * 2, // 0011
STEP_SIZE * 2, // 0100
STEP_SIZE * 3, // 0101
STEP_SIZE * 3, // 0110
STEP_SIZE * 4, // 0111
STEP_SIZE * 4, // 1000
STEP_SIZE * 5, // 1001
STEP_SIZE * 5, // 1010
STEP_SIZE * 6, // 1011
STEP_SIZE * 6, // 1100
STEP_SIZE * 7, // 1101
STEP_SIZE * 8, // 1110
STEP_SIZE * 9 // 1111
};
// Table used for converting fraction into mantissa.
// Reverse operation of the above
private static int[] fractionToMantissaTable = {
0, // 0
1, // 1
3, // 2
5, // 3
7, // 4
9, // 5
11, // 6
13, // 7
14, // 8
15, // 9
15, // 10 (Edge case: because we round the fraction, we can get 10 here.)
};
public static final byte LARGEST_FRACTION_UNDER_ONE = (byte) (toSingleBytePositiveFloat(1f) - 1);
/**
* Converts the given byte to java float.
*/
private static float toJavaFloatSlow(byte b) {
if (b == INFINITY) {
return Float.POSITIVE_INFINITY;
}
if ((b & 0xff) > (INFINITY & 0xff)) {
return Float.NaN;
}
int exponent = ((b & 0xff) >>> 4) - EXPONENT_BIAS;
int mantissa = b & MANTISSA_MASK;
return mantissaToFractionTable[mantissa] * (float) Math.pow(10.0f, exponent);
}
// Cached results from byte to float conversion
private static final float[] BYTE_TO_FLOAT_CONVERSION_TABLE = new float[256];
private static final double[] BYTE_TO_LOG2_CONVERSION_TABLE = new double[256];
private static final byte[] OLD_TO_NEW_BYTE_CONVERSION_TABLE = new byte[256];
static {
LogByteNormalizer normalizer = new LogByteNormalizer();
for (int i = 0; i < 256; i++) {
byte b = (byte) i;
BYTE_TO_FLOAT_CONVERSION_TABLE[i] = toJavaFloatSlow(b);
BYTE_TO_LOG2_CONVERSION_TABLE[i] =
0xff & normalizer.normalize(BYTE_TO_FLOAT_CONVERSION_TABLE[i]);
if (b == 0) {
OLD_TO_NEW_BYTE_CONVERSION_TABLE[i] = 0;
} else if (b > 0) {
OLD_TO_NEW_BYTE_CONVERSION_TABLE[i] =
toSingleBytePositiveFloat((float) normalizer.unnormLowerBound(b));
} else {
// should not get here.
OLD_TO_NEW_BYTE_CONVERSION_TABLE[i] = MAX_BYTE_VALUE;
}
}
}
/**
* Convert a normalized byte to the log2() version of its original value
*/
static double toLog2Double(byte b) {
return BYTE_TO_LOG2_CONVERSION_TABLE[b & 0xff];
}
}