the-algorithm/pushservice/src/main/python/models/libs/model_utils.py

340 lines
10 KiB
Python

import sys
import twml
from .initializer import customized_glorot_uniform
import tensorflow.compat.v1 as tf
import yaml
# checkstyle: noqa
def read_config(whitelist_yaml_file):
with tf.gfile.FastGFile(whitelist_yaml_file) as f:
try:
return yaml.safe_load(f)
except yaml.YAMLError as exc:
print(exc)
sys.exit(1)
def _sparse_feature_fixup(features, input_size_bits):
"""Rebuild a sparse tensor feature so that its dense shape attribute is present.
Arguments:
features (SparseTensor): Sparse feature tensor of shape ``(B, sparse_feature_dim)``.
input_size_bits (int): Number of columns in ``log2`` scale. Must be positive.
Returns:
SparseTensor: Rebuilt and non-faulty version of `features`."""
sparse_feature_dim = tf.constant(2**input_size_bits, dtype=tf.int64)
sparse_shape = tf.stack([features.dense_shape[0], sparse_feature_dim])
sparse_tf = tf.SparseTensor(features.indices, features.values, sparse_shape)
return sparse_tf
def self_atten_dense(input, out_dim, activation=None, use_bias=True, name=None):
def safe_concat(base, suffix):
"""Concats variables name components if base is given."""
if not base:
return base
return f"{base}:{suffix}"
input_dim = input.shape.as_list()[1]
sigmoid_out = twml.layers.FullDense(
input_dim, dtype=tf.float32, activation=tf.nn.sigmoid, name=safe_concat(name, "sigmoid_out")
)(input)
atten_input = sigmoid_out * input
mlp_out = twml.layers.FullDense(
out_dim,
dtype=tf.float32,
activation=activation,
use_bias=use_bias,
name=safe_concat(name, "mlp_out"),
)(atten_input)
return mlp_out
def get_dense_out(input, out_dim, activation, dense_type):
if dense_type == "full_dense":
out = twml.layers.FullDense(out_dim, dtype=tf.float32, activation=activation)(input)
elif dense_type == "self_atten_dense":
out = self_atten_dense(input, out_dim, activation=activation)
return out
def get_input_trans_func(bn_normalized_dense, is_training):
gw_normalized_dense = tf.expand_dims(bn_normalized_dense, -1)
group_num = bn_normalized_dense.shape.as_list()[1]
gw_normalized_dense = GroupWiseTrans(group_num, 1, 8, name="groupwise_1", activation=tf.tanh)(
gw_normalized_dense
)
gw_normalized_dense = GroupWiseTrans(group_num, 8, 4, name="groupwise_2", activation=tf.tanh)(
gw_normalized_dense
)
gw_normalized_dense = GroupWiseTrans(group_num, 4, 1, name="groupwise_3", activation=tf.tanh)(
gw_normalized_dense
)
gw_normalized_dense = tf.squeeze(gw_normalized_dense, [-1])
bn_gw_normalized_dense = tf.layers.batch_normalization(
gw_normalized_dense,
training=is_training,
renorm_momentum=0.9999,
momentum=0.9999,
renorm=is_training,
trainable=True,
)
return bn_gw_normalized_dense
def tensor_dropout(
input_tensor,
rate,
is_training,
sparse_tensor=None,
):
"""
Implements dropout layer for both dense and sparse input_tensor
Arguments:
input_tensor:
B x D dense tensor, or a sparse tensor
rate (float32):
dropout rate
is_training (bool):
training stage or not.
sparse_tensor (bool):
whether the input_tensor is sparse tensor or not. Default to be None, this value has to be passed explicitly.
rescale_sparse_dropout (bool):
Do we need to do rescaling or not.
Returns:
tensor dropped out"""
if sparse_tensor == True:
if is_training:
with tf.variable_scope("sparse_dropout"):
values = input_tensor.values
keep_mask = tf.keras.backend.random_binomial(
tf.shape(values), p=1 - rate, dtype=tf.float32, seed=None
)
keep_mask.set_shape([None])
keep_mask = tf.cast(keep_mask, tf.bool)
keep_indices = tf.boolean_mask(input_tensor.indices, keep_mask, axis=0)
keep_values = tf.boolean_mask(values, keep_mask, axis=0)
dropped_tensor = tf.SparseTensor(keep_indices, keep_values, input_tensor.dense_shape)
return dropped_tensor
else:
return input_tensor
elif sparse_tensor == False:
return tf.layers.dropout(input_tensor, rate=rate, training=is_training)
def adaptive_transformation(bn_normalized_dense, is_training, func_type="default"):
assert func_type in [
"default",
"tiny",
], f"fun_type can only be one of default and tiny, but get {func_type}"
gw_normalized_dense = tf.expand_dims(bn_normalized_dense, -1)
group_num = bn_normalized_dense.shape.as_list()[1]
if func_type == "default":
gw_normalized_dense = FastGroupWiseTrans(
group_num, 1, 8, name="groupwise_1", activation=tf.tanh, init_multiplier=8
)(gw_normalized_dense)
gw_normalized_dense = FastGroupWiseTrans(
group_num, 8, 4, name="groupwise_2", activation=tf.tanh, init_multiplier=8
)(gw_normalized_dense)
gw_normalized_dense = FastGroupWiseTrans(
group_num, 4, 1, name="groupwise_3", activation=tf.tanh, init_multiplier=8
)(gw_normalized_dense)
elif func_type == "tiny":
gw_normalized_dense = FastGroupWiseTrans(
group_num, 1, 2, name="groupwise_1", activation=tf.tanh, init_multiplier=8
)(gw_normalized_dense)
gw_normalized_dense = FastGroupWiseTrans(
group_num, 2, 1, name="groupwise_2", activation=tf.tanh, init_multiplier=8
)(gw_normalized_dense)
gw_normalized_dense = FastGroupWiseTrans(
group_num, 1, 1, name="groupwise_3", activation=tf.tanh, init_multiplier=8
)(gw_normalized_dense)
gw_normalized_dense = tf.squeeze(gw_normalized_dense, [-1])
bn_gw_normalized_dense = tf.layers.batch_normalization(
gw_normalized_dense,
training=is_training,
renorm_momentum=0.9999,
momentum=0.9999,
renorm=is_training,
trainable=True,
)
return bn_gw_normalized_dense
class FastGroupWiseTrans(object):
"""
used to apply group-wise fully connected layers to the input.
it applies a tiny, unique MLP to each individual feature."""
def __init__(self, group_num, input_dim, out_dim, name, activation=None, init_multiplier=1):
self.group_num = group_num
self.input_dim = input_dim
self.out_dim = out_dim
self.activation = activation
self.init_multiplier = init_multiplier
self.w = tf.get_variable(
name + "_group_weight",
[1, group_num, input_dim, out_dim],
initializer=customized_glorot_uniform(
fan_in=input_dim * init_multiplier, fan_out=out_dim * init_multiplier
),
trainable=True,
)
self.b = tf.get_variable(
name + "_group_bias",
[1, group_num, out_dim],
initializer=tf.constant_initializer(0.0),
trainable=True,
)
def __call__(self, input_tensor):
"""
input_tensor: batch_size x group_num x input_dim
output_tensor: batch_size x group_num x out_dim"""
input_tensor_expand = tf.expand_dims(input_tensor, axis=-1)
output_tensor = tf.add(
tf.reduce_sum(tf.multiply(input_tensor_expand, self.w), axis=-2, keepdims=False),
self.b,
)
if self.activation is not None:
output_tensor = self.activation(output_tensor)
return output_tensor
class GroupWiseTrans(object):
"""
Used to apply group fully connected layers to the input.
"""
def __init__(self, group_num, input_dim, out_dim, name, activation=None):
self.group_num = group_num
self.input_dim = input_dim
self.out_dim = out_dim
self.activation = activation
w_list, b_list = [], []
for idx in range(out_dim):
this_w = tf.get_variable(
name + f"_group_weight_{idx}",
[1, group_num, input_dim],
initializer=tf.keras.initializers.glorot_uniform(),
trainable=True,
)
this_b = tf.get_variable(
name + f"_group_bias_{idx}",
[1, group_num, 1],
initializer=tf.constant_initializer(0.0),
trainable=True,
)
w_list.append(this_w)
b_list.append(this_b)
self.w_list = w_list
self.b_list = b_list
def __call__(self, input_tensor):
"""
input_tensor: batch_size x group_num x input_dim
output_tensor: batch_size x group_num x out_dim
"""
out_tensor_list = []
for idx in range(self.out_dim):
this_res = (
tf.reduce_sum(input_tensor * self.w_list[idx], axis=-1, keepdims=True) + self.b_list[idx]
)
out_tensor_list.append(this_res)
output_tensor = tf.concat(out_tensor_list, axis=-1)
if self.activation is not None:
output_tensor = self.activation(output_tensor)
return output_tensor
def add_scalar_summary(var, name, name_scope="hist_dense_feature/"):
with tf.name_scope("summaries/"):
with tf.name_scope(name_scope):
tf.summary.scalar(name, var)
def add_histogram_summary(var, name, name_scope="hist_dense_feature/"):
with tf.name_scope("summaries/"):
with tf.name_scope(name_scope):
tf.summary.histogram(name, tf.reshape(var, [-1]))
def sparse_clip_by_value(sparse_tf, min_val, max_val):
new_vals = tf.clip_by_value(sparse_tf.values, min_val, max_val)
return tf.SparseTensor(sparse_tf.indices, new_vals, sparse_tf.dense_shape)
def check_numerics_with_msg(tensor, message="", sparse_tensor=False):
if sparse_tensor:
values = tf.debugging.check_numerics(tensor.values, message=message)
return tf.SparseTensor(tensor.indices, values, tensor.dense_shape)
else:
return tf.debugging.check_numerics(tensor, message=message)
def pad_empty_sparse_tensor(tensor):
dummy_tensor = tf.SparseTensor(
indices=[[0, 0]],
values=[0.00001],
dense_shape=tensor.dense_shape,
)
result = tf.cond(
tf.equal(tf.size(tensor.values), 0),
lambda: dummy_tensor,
lambda: tensor,
)
return result
def filter_nans_and_infs(tensor, sparse_tensor=False):
if sparse_tensor:
sparse_values = tensor.values
filtered_val = tf.where(
tf.logical_or(tf.is_nan(sparse_values), tf.is_inf(sparse_values)),
tf.zeros_like(sparse_values),
sparse_values,
)
return tf.SparseTensor(tensor.indices, filtered_val, tensor.dense_shape)
else:
return tf.where(
tf.logical_or(tf.is_nan(tensor), tf.is_inf(tensor)), tf.zeros_like(tensor), tensor
)
def generate_disliked_mask(labels):
"""Generate a disliked mask where only samples with dislike labels are set to 1 otherwise set to 0.
Args:
labels: labels of training samples, which is a 2D tensor of shape batch_size x 3: [OONCs, engagements, dislikes]
Returns:
1D tensor of shape batch_size x 1: [dislikes (booleans)]
"""
return tf.equal(tf.reshape(labels[:, 2], shape=[-1, 1]), 1)