the-algorithm/twml/libtwml/src/ops/percentile_discretizer_v2.cpp
twitter-team ef4c5eb65e Twitter Recommendation Algorithm
Please note we have force-pushed a new initial commit in order to remove some publicly-available Twitter user information. Note that this process may be required in the future.
2023-03-31 17:36:31 -05:00

242 lines
9.6 KiB
C++

#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/util/work_sharder.h"
#include <twml.h>
#include "tensorflow_utils.h"
using namespace tensorflow;
void CombinedComputeDiscretizers(
OpKernelContext*,
int64_t,
const twml::Map<int64_t, int64_t>&,
int64_t);
REGISTER_OP("PercentileDiscretizerV2")
.Attr("T: {float, double}")
.Input("input_ids: int64")
.Input("input_vals: T")
.Input("bin_ids: int64")
.Input("bin_vals: T")
.Input("feature_offsets: int64")
.Input("start_compute: int64")
.Input("end_compute: int64")
.Attr("output_bits: int")
.Attr("feature_ids: tensor = { dtype: DT_INT64 }")
.Attr("feature_indices: tensor = { dtype: DT_INT64 }")
.Attr("cost_per_unit: int")
.Output("new_keys: int64")
.Output("new_vals: T")
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
// TODO: check sizes
c->set_output(0, c->input(0));
c->set_output(1, c->input(0));
return Status::OK();
}).Doc(R"doc(
This operation discretizes a tensor containing continuous features (if calibrated).
- note - choice of float or double should be consistent among inputs/output
Input
input_ids(int64): A tensor containing input feature ids (direct from data record).
input_vals: A tensor containing input values at corresponding feature ids.
- i.e. input_ids[i] <-> input_vals[i] for each i
- float or double
bin_ids(int64): A tensor containing the discretized feature id for each bin.
bin_vals: A tensor containing the bin boundaries for values of a given feature.
- float or double
feature_offsets(int64): Specifies the starting location of bins for a given feature id.
start_compute(int64 scalar tensor): which index to start the computation at
end_compute(int64 scalar tensor): which index to end the computation right before
-> for example, (start_compute,end_compute)=(0,10) would compute on 0 thru 9
output_bits(int): The maximum number of bits to use for the output IDs.
-> 2**out_bits must be greater than bin_ids.size
feature_ids(int64): 1D TensorProto of feature IDs seen during calibration
feature_indices(int64): 1D TensorProto of feature indices corresponding with feature_IDs
-> hint: look up make_tensor_proto:
proto_init = np.array(values, dtype=np.int64)
tensor_attr = tf.make_tensor_proto(my_proto_init)
cost_per_unit(int): An estimate of the number of CPU cycles (or nanoseconds
if not CPU-bound) to complete a unit of work. Overestimating creates too
many shards and CPU time will be dominated by per-shard overhead, such as
Context creation. Underestimating may not fully make use of the specified
parallelism.
Outputs
new_keys(int64): The discretized feature ids with same shape and size as keys.
new_vals(float or double): The discretized values with the same shape and size as vals.
Operation
Note that the discretization operation maps observation vectors to higher dimensional
observation vectors. Here, we describe this mapping.
Let a calibrated feature observation be given by (F,x), where F is the ID of the
feature, and x is some real value (i.e., continuous feature). This kind of
representation is useful for the representation of sparse vectors, where there
are many zeros.
For example, for a dense feature vector [1.2, 2.4, 3.6], we might have
(0, 1.2) (1, 2.4) and (2, 3.6), with feature IDs indicating the 0th, 1st, and 2nd
elements of the vector
The disretizer performs the following operation:
(F,x) -> (map(x|F),1).
Hence, we have that map(x|F) is a new feature ID, and the value observed for that
feature is 1. We might read map(x|F) as 'the map of x for feature F'.
For each feature F, we associate a (discrete, finite) set of new feature IDs, newIDs(F).
We will then have that F~(x) is in the set newIDs(F) for any value of x. Each set member
of newIDs(F) is associated with a 'bin', as defined by the bin boundaries given in
the bin_vals input array. For any two different feature IDs F and G, we have that
INTERSECT(newIDs(F),newIDs(G)) is the empty set
Example - consider input vector with a single element, i.e. [x].
Let's Discretize to one of 2 values, as follows:
Let F=0 for the ID of the single feature in the vector.
Let the bin boundary of feature F=0 be BNDRY(F) = BNDRY(0) since F=0
Let newIDs(F) = newIDs(0) = {0,1}
Let map(x|F) = map(x|0) = 0 if x<=BNDRY else 1
If we had another element y in the vector, i.e. [x, y], then we might additionally
Let F=1 for element y.
Let the bin boundary be BNDRY(F) = BNDRY(1) since F=1
Let newIDs(F) = newIDs(1) = {2,3} (so as to have empty intersect with newIDs(0))
Let map(x|F) = map(x|1) = 2 if x<=BNDRY else 3
Consider vector observation [-0.1, 0.2]. We then represent this as [(0, -0.1), (1, 0.2)]
Let BNDRY(0) = BNDRY(1) = 0. When we discretize the vector, we get:
(0, -0.1) -> (map(-0.1|0), 1) = (0, 1)
(1, 0.2) -> (map( 0.2|1), 1) = (3, 1)
Our output vector is then represented sparsely as [(0, 1), (3, 1)], and the dense
representation of this could be [1, 0, 0, 1]
)doc");
template<typename T>
class PercentileDiscretizerV2 : public OpKernel {
public:
explicit PercentileDiscretizerV2(OpKernelConstruction* context) : OpKernel(context) {
// get the number of output bits
// for use with features that have not been calibrated
OP_REQUIRES_OK(context,
context->GetAttr("output_bits", &output_bits_));
OP_REQUIRES_OK(context,
context->GetAttr("cost_per_unit", &cost_per_unit_));
OP_REQUIRES(context, cost_per_unit_ >= 0,
errors::InvalidArgument("Must have cost_per_unit >= 0."));
// construct the ID_to_index hash map
Tensor feature_IDs;
Tensor feature_indices;
// extract the tensors
OP_REQUIRES_OK(context,
context->GetAttr("feature_ids", &feature_IDs));
OP_REQUIRES_OK(context,
context->GetAttr("feature_indices", &feature_indices));
// for access to the data
// int64_t data type is set in to_layer function of the calibrator objects in Python
auto feature_IDs_flat = feature_IDs.flat<int64>();
auto feature_indices_flat = feature_indices.flat<int64>();
// verify proper dimension constraints
OP_REQUIRES(context, feature_IDs.shape() == feature_indices.shape(),
errors::InvalidArgument("feature_ids and feature_indices must be identical shape."));
OP_REQUIRES(context, feature_IDs.shape().dims() == 1,
errors::InvalidArgument("feature_ids and feature_indices must be 1D."));
// reserve space in the hash map and fill in the values
int num_features = feature_IDs.shape().dim_size(0);
#ifdef USE_DENSE_HASH
ID_to_index_.set_empty_key(0);
ID_to_index_.resize(num_features);
#else
ID_to_index_.reserve(num_features);
#endif // USE_DENSE_HASH
for (int i = 0 ; i < num_features ; i++) {
ID_to_index_[feature_IDs_flat(i)] = feature_indices_flat(i);
}
}
void Compute(OpKernelContext* context) override {
CombinedComputeDiscretizers(
context,
output_bits_,
ID_to_index_,
cost_per_unit_);
}
private:
twml::Map<int64_t, int64_t> ID_to_index_;
int output_bits_;
int cost_per_unit_;
};
#define REGISTER(Type) \
REGISTER_KERNEL_BUILDER( \
Name("PercentileDiscretizerV2") \
.Device(DEVICE_CPU) \
.TypeConstraint<Type>("T"), \
PercentileDiscretizerV2<Type>); \
REGISTER(float);
REGISTER(double);
void CombinedComputeDiscretizers(
OpKernelContext* context,
int64_t output_bits,
const twml::Map<int64_t, int64_t> &ID_to_index,
int64_t cost_per_unit) {
const Tensor& keys = context->input(0);
const Tensor& vals = context->input(1);
const Tensor& bin_ids = context->input(2);
const Tensor& bin_vals = context->input(3);
const Tensor& feature_offsets = context->input(4);
uint64 full_size = keys.dim_size(0);
const int total_size = static_cast<int64>(full_size);
TensorShape output_shape = {total_size};
Tensor* new_keys = nullptr;
OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &new_keys));
Tensor* new_vals = nullptr;
OP_REQUIRES_OK(context, context->allocate_output(1, output_shape, &new_vals));
try {
twml::Tensor out_keys_ = TFTensor_to_twml_tensor(*new_keys);
twml::Tensor out_vals_ = TFTensor_to_twml_tensor(*new_vals);
const twml::Tensor in_keys_ = TFTensor_to_twml_tensor(keys);
const twml::Tensor in_vals_ = TFTensor_to_twml_tensor(vals);
const twml::Tensor bin_ids_ = TFTensor_to_twml_tensor(bin_ids);
const twml::Tensor bin_vals_ = TFTensor_to_twml_tensor(bin_vals);
const twml::Tensor feature_offsets_ = TFTensor_to_twml_tensor(feature_offsets);
// retrieve the thread pool from the op context
auto worker_threads = *(context->device()->tensorflow_cpu_worker_threads());
// Definition of the computation thread
auto task = [&](int64 start, int64 limit) {
twml::discretizerInfer(out_keys_, out_vals_,
in_keys_, in_vals_,
bin_ids_, bin_vals_,
feature_offsets_, output_bits,
ID_to_index,
start, limit,
start);
};
// let Tensorflow split up the work as it sees fit
Shard(worker_threads.num_threads,
worker_threads.workers,
full_size,
static_cast<int64>(cost_per_unit),
task);
} catch (const std::exception &e) {
context->CtxFailureWithWarning(errors::InvalidArgument(e.what()));
}
}