165 lines
5.1 KiB
Java
165 lines
5.1 KiB
Java
package com.twitter.search.common.encoding.features;
|
|
|
|
/**
|
|
* Util used to:
|
|
* - Encode a positive Java float into a single byte float
|
|
* - Decode a single byte into a positive Java float
|
|
*
|
|
* Configuration:
|
|
* - Exponent: higher 4 bits, base 10.
|
|
* - Mantissa: lower 4 bit, representing 1.0 to 9.0
|
|
* - Exponent bias is 1.
|
|
*
|
|
* Formula:
|
|
* Max(Mantissa, 9) * 10 ^ (Exponent - 1)
|
|
*
|
|
* Smallest float: 0.0 (0000 0000)
|
|
* Smallest positive float: 1.0 * 10^-1 (0000 0001)
|
|
* Largest float: 9.0 * 10^13 (1110 1111)
|
|
* Infinity: (1111 0000)
|
|
* NaN: (1111 1000)
|
|
*/
|
|
public final class SingleBytePositiveFloatUtil {
|
|
private SingleBytePositiveFloatUtil() { }
|
|
|
|
// 4 bits mantissa. Range [1.0, 10.0) is divided into 16 steps
|
|
public static final byte MAX_BYTE_VALUE = (byte) 0xEF;
|
|
public static final byte INFINITY = (byte) 0xF0;
|
|
public static final byte NOT_A_NUMBER = (byte) 0xF8;
|
|
private static final float STEP_SIZE = 1.0f;
|
|
private static final int EXPONENT_BIAS = 1;
|
|
private static final byte MIN_EXPONENT = -EXPONENT_BIAS;
|
|
private static final int MAX_EXPONENT = 14 - EXPONENT_BIAS;
|
|
private static final byte MANTISSA_MASK = 0x0F;
|
|
|
|
/**
|
|
* Converts the given float into a single byte floating point number.
|
|
* This is used in the updater and OK to be a bit slow.
|
|
*/
|
|
public static byte toSingleBytePositiveFloat(float f) {
|
|
if (f < 0) {
|
|
throw new UnsupportedOperationException(
|
|
"Cannot encode negative floats into SingleBytePostiveFloat.");
|
|
}
|
|
|
|
if (Float.compare(f, Float.POSITIVE_INFINITY) == 0) {
|
|
return INFINITY;
|
|
}
|
|
|
|
if (Float.compare(f, Float.NaN) == 0) {
|
|
return NOT_A_NUMBER;
|
|
}
|
|
|
|
int mantissa = 0;
|
|
int exponent = (int) Math.floor(Math.log10(f));
|
|
// Overflow (Number too large), just return the largest possible value
|
|
if (exponent > MAX_EXPONENT) {
|
|
return MAX_BYTE_VALUE;
|
|
}
|
|
|
|
// Underflow (Number too small), just return 0
|
|
if (exponent < MIN_EXPONENT) {
|
|
return 0;
|
|
}
|
|
|
|
int frac = Math.round(f / (float) Math.pow(10.0f, exponent) / STEP_SIZE);
|
|
mantissa = fractionToMantissaTable[frac];
|
|
|
|
return (byte) (((exponent + EXPONENT_BIAS) << 4) | mantissa);
|
|
}
|
|
|
|
/**
|
|
* Called in Earlybird per hit and needs to be fast.
|
|
*/
|
|
public static float toJavaFloat(byte b) {
|
|
return BYTE_TO_FLOAT_CONVERSION_TABLE[b & 0xff];
|
|
}
|
|
|
|
// Table used for converting mantissa into a significant
|
|
private static float[] mantissaToFractionTable = {
|
|
// Decimal Matisa value
|
|
STEP_SIZE * 0, // 0000
|
|
STEP_SIZE * 1, // 0001
|
|
STEP_SIZE * 1, // 0010
|
|
STEP_SIZE * 2, // 0011
|
|
STEP_SIZE * 2, // 0100
|
|
STEP_SIZE * 3, // 0101
|
|
STEP_SIZE * 3, // 0110
|
|
STEP_SIZE * 4, // 0111
|
|
STEP_SIZE * 4, // 1000
|
|
STEP_SIZE * 5, // 1001
|
|
STEP_SIZE * 5, // 1010
|
|
STEP_SIZE * 6, // 1011
|
|
STEP_SIZE * 6, // 1100
|
|
STEP_SIZE * 7, // 1101
|
|
STEP_SIZE * 8, // 1110
|
|
STEP_SIZE * 9 // 1111
|
|
};
|
|
|
|
// Table used for converting fraction into mantissa.
|
|
// Reverse operation of the above
|
|
private static int[] fractionToMantissaTable = {
|
|
0, // 0
|
|
1, // 1
|
|
3, // 2
|
|
5, // 3
|
|
7, // 4
|
|
9, // 5
|
|
11, // 6
|
|
13, // 7
|
|
14, // 8
|
|
15, // 9
|
|
15, // 10 (Edge case: because we round the fraction, we can get 10 here.)
|
|
};
|
|
|
|
public static final byte LARGEST_FRACTION_UNDER_ONE = (byte) (toSingleBytePositiveFloat(1f) - 1);
|
|
|
|
/**
|
|
* Converts the given byte to java float.
|
|
*/
|
|
private static float toJavaFloatSlow(byte b) {
|
|
if (b == INFINITY) {
|
|
return Float.POSITIVE_INFINITY;
|
|
}
|
|
|
|
if ((b & 0xff) > (INFINITY & 0xff)) {
|
|
return Float.NaN;
|
|
}
|
|
|
|
int exponent = ((b & 0xff) >>> 4) - EXPONENT_BIAS;
|
|
int mantissa = b & MANTISSA_MASK;
|
|
return mantissaToFractionTable[mantissa] * (float) Math.pow(10.0f, exponent);
|
|
}
|
|
|
|
// Cached results from byte to float conversion
|
|
private static final float[] BYTE_TO_FLOAT_CONVERSION_TABLE = new float[256];
|
|
private static final double[] BYTE_TO_LOG2_CONVERSION_TABLE = new double[256];
|
|
private static final byte[] OLD_TO_NEW_BYTE_CONVERSION_TABLE = new byte[256];
|
|
|
|
static {
|
|
LogByteNormalizer normalizer = new LogByteNormalizer();
|
|
for (int i = 0; i < 256; i++) {
|
|
byte b = (byte) i;
|
|
BYTE_TO_FLOAT_CONVERSION_TABLE[i] = toJavaFloatSlow(b);
|
|
BYTE_TO_LOG2_CONVERSION_TABLE[i] =
|
|
0xff & normalizer.normalize(BYTE_TO_FLOAT_CONVERSION_TABLE[i]);
|
|
if (b == 0) {
|
|
OLD_TO_NEW_BYTE_CONVERSION_TABLE[i] = 0;
|
|
} else if (b > 0) {
|
|
OLD_TO_NEW_BYTE_CONVERSION_TABLE[i] =
|
|
toSingleBytePositiveFloat((float) normalizer.unnormLowerBound(b));
|
|
} else {
|
|
// should not get here.
|
|
OLD_TO_NEW_BYTE_CONVERSION_TABLE[i] = MAX_BYTE_VALUE;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Convert a normalized byte to the log2() version of its original value
|
|
*/
|
|
static double toLog2Double(byte b) {
|
|
return BYTE_TO_LOG2_CONVERSION_TABLE[b & 0xff];
|
|
}
|
|
}
|