the-algorithm/pushservice/src/main/python/models/light_ranking/model_pools_mlp.py
twitter-team b389c3d302 Open-sourcing pushservice
Pushservice is the main recommendation service we use to surface recommendations to our users via notifications. It fetches candidates from various sources, ranks them in order of relevance, and applies filters to determine the best one to send.
2023-05-19 16:27:07 -05:00

188 lines
6.0 KiB
Python

import warnings
from twml.contrib.layers import ZscoreNormalization
from ...libs.customized_full_sparse import FullSparse
from ...libs.get_feat_config import FEAT_CONFIG_DEFAULT_VAL as MISSING_VALUE_MARKER
from ...libs.model_utils import (
_sparse_feature_fixup,
adaptive_transformation,
filter_nans_and_infs,
get_dense_out,
tensor_dropout,
)
import tensorflow.compat.v1 as tf
# checkstyle: noqa
def light_ranking_mlp_ngbdt(features, is_training, params, label=None):
return deepnorm_light_ranking(
features,
is_training,
params,
label=label,
decay=params.momentum,
dense_emb_size=params.dense_embedding_size,
base_activation=tf.keras.layers.LeakyReLU(),
input_dropout_rate=params.dropout,
use_gbdt=False,
)
def deepnorm_light_ranking(
features,
is_training,
params,
label=None,
decay=0.99999,
dense_emb_size=128,
base_activation=None,
input_dropout_rate=None,
input_dense_type="self_atten_dense",
emb_dense_type="self_atten_dense",
mlp_dense_type="self_atten_dense",
use_gbdt=False,
):
# --------------------------------------------------------
# Initial Parameter Checking
# --------------------------------------------------------
if base_activation is None:
base_activation = tf.keras.layers.LeakyReLU()
if label is not None:
warnings.warn(
"Label is unused in deepnorm_gbdt. Stop using this argument.",
DeprecationWarning,
)
with tf.variable_scope("helper_layers"):
full_sparse_layer = FullSparse(
output_size=params.sparse_embedding_size,
activation=base_activation,
use_sparse_grads=is_training,
use_binary_values=False,
dtype=tf.float32,
)
input_normalizing_layer = ZscoreNormalization(decay=decay, name="input_normalizing_layer")
# --------------------------------------------------------
# Feature Selection & Embedding
# --------------------------------------------------------
if use_gbdt:
sparse_gbdt_features = _sparse_feature_fixup(features["gbdt_sparse"], params.input_size_bits)
if input_dropout_rate is not None:
sparse_gbdt_features = tensor_dropout(
sparse_gbdt_features, input_dropout_rate, is_training, sparse_tensor=True
)
total_embed = full_sparse_layer(sparse_gbdt_features, use_binary_values=True)
if (input_dropout_rate is not None) and is_training:
total_embed = total_embed / (1 - input_dropout_rate)
else:
with tf.variable_scope("dense_branch"):
dense_continuous_features = filter_nans_and_infs(features["continuous"])
if params.use_missing_sub_branch:
is_missing = tf.equal(dense_continuous_features, MISSING_VALUE_MARKER)
continuous_features_filled = tf.where(
is_missing,
tf.zeros_like(dense_continuous_features),
dense_continuous_features,
)
normalized_features = input_normalizing_layer(
continuous_features_filled, is_training, tf.math.logical_not(is_missing)
)
with tf.variable_scope("missing_sub_branch"):
missing_feature_embed = get_dense_out(
tf.cast(is_missing, tf.float32),
dense_emb_size,
activation=base_activation,
dense_type=input_dense_type,
)
else:
continuous_features_filled = dense_continuous_features
normalized_features = input_normalizing_layer(continuous_features_filled, is_training)
with tf.variable_scope("continuous_sub_branch"):
normalized_features = adaptive_transformation(
normalized_features, is_training, func_type="tiny"
)
if input_dropout_rate is not None:
normalized_features = tensor_dropout(
normalized_features,
input_dropout_rate,
is_training,
sparse_tensor=False,
)
filled_feature_embed = get_dense_out(
normalized_features,
dense_emb_size,
activation=base_activation,
dense_type=input_dense_type,
)
if params.use_missing_sub_branch:
dense_embed = tf.concat(
[filled_feature_embed, missing_feature_embed], axis=1, name="merge_dense_emb"
)
else:
dense_embed = filled_feature_embed
with tf.variable_scope("sparse_branch"):
sparse_discrete_features = _sparse_feature_fixup(
features["sparse_no_continuous"], params.input_size_bits
)
if input_dropout_rate is not None:
sparse_discrete_features = tensor_dropout(
sparse_discrete_features, input_dropout_rate, is_training, sparse_tensor=True
)
discrete_features_embed = full_sparse_layer(sparse_discrete_features, use_binary_values=True)
if (input_dropout_rate is not None) and is_training:
discrete_features_embed = discrete_features_embed / (1 - input_dropout_rate)
total_embed = tf.concat(
[dense_embed, discrete_features_embed],
axis=1,
name="total_embed",
)
total_embed = tf.layers.batch_normalization(
total_embed,
training=is_training,
renorm_momentum=decay,
momentum=decay,
renorm=is_training,
trainable=True,
)
# --------------------------------------------------------
# MLP Layers
# --------------------------------------------------------
with tf.variable_scope("MLP_branch"):
assert params.num_mlp_layers >= 0
embed_list = [total_embed] + [None for _ in range(params.num_mlp_layers)]
dense_types = [emb_dense_type] + [mlp_dense_type for _ in range(params.num_mlp_layers - 1)]
for xl in range(1, params.num_mlp_layers + 1):
neurons = params.mlp_neuron_scale ** (params.num_mlp_layers + 1 - xl)
embed_list[xl] = get_dense_out(
embed_list[xl - 1], neurons, activation=base_activation, dense_type=dense_types[xl - 1]
)
if params.task_name in ["Sent", "HeavyRankPosition", "HeavyRankProbability"]:
logits = get_dense_out(embed_list[-1], 1, activation=None, dense_type=mlp_dense_type)
else:
raise ValueError("Invalid Task Name !")
output_dict = {"output": logits}
return output_dict