the-algorithm/pushservice/src/main/python/models/light_ranking/model_pools_mlp.py

188 lines
6.0 KiB
Python

import warnings
from twml.contrib.layers import ZscoreNormalization
from ...libs.customized_full_sparse import FullSparse
from ...libs.get_feat_config import FEAT_CONFIG_DEFAULT_VAL as MISSING_VALUE_MARKER
from ...libs.model_utils import (
_sparse_feature_fixup,
adaptive_transformation,
filter_nans_and_infs,
get_dense_out,
tensor_dropout,
)
import tensorflow.compat.v1 as tf
# checkstyle: noqa
def light_ranking_mlp_ngbdt(features, is_training, params, label=None):
return deepnorm_light_ranking(
features,
is_training,
params,
label=label,
decay=params.momentum,
dense_emb_size=params.dense_embedding_size,
base_activation=tf.keras.layers.LeakyReLU(),
input_dropout_rate=params.dropout,
use_gbdt=False,
)
def deepnorm_light_ranking(
features,
is_training,
params,
label=None,
decay=0.99999,
dense_emb_size=128,
base_activation=None,
input_dropout_rate=None,
input_dense_type="self_atten_dense",
emb_dense_type="self_atten_dense",
mlp_dense_type="self_atten_dense",
use_gbdt=False,
):
# --------------------------------------------------------
# Initial Parameter Checking
# --------------------------------------------------------
if base_activation is None:
base_activation = tf.keras.layers.LeakyReLU()
if label is not None:
warnings.warn(
"Label is unused in deepnorm_gbdt. Stop using this argument.",
DeprecationWarning,
)
with tf.variable_scope("helper_layers"):
full_sparse_layer = FullSparse(
output_size=params.sparse_embedding_size,
activation=base_activation,
use_sparse_grads=is_training,
use_binary_values=False,
dtype=tf.float32,
)
input_normalizing_layer = ZscoreNormalization(decay=decay, name="input_normalizing_layer")
# --------------------------------------------------------
# Feature Selection & Embedding
# --------------------------------------------------------
if use_gbdt:
sparse_gbdt_features = _sparse_feature_fixup(features["gbdt_sparse"], params.input_size_bits)
if input_dropout_rate is not None:
sparse_gbdt_features = tensor_dropout(
sparse_gbdt_features, input_dropout_rate, is_training, sparse_tensor=True
)
total_embed = full_sparse_layer(sparse_gbdt_features, use_binary_values=True)
if (input_dropout_rate is not None) and is_training:
total_embed = total_embed / (1 - input_dropout_rate)
else:
with tf.variable_scope("dense_branch"):
dense_continuous_features = filter_nans_and_infs(features["continuous"])
if params.use_missing_sub_branch:
is_missing = tf.equal(dense_continuous_features, MISSING_VALUE_MARKER)
continuous_features_filled = tf.where(
is_missing,
tf.zeros_like(dense_continuous_features),
dense_continuous_features,
)
normalized_features = input_normalizing_layer(
continuous_features_filled, is_training, tf.math.logical_not(is_missing)
)
with tf.variable_scope("missing_sub_branch"):
missing_feature_embed = get_dense_out(
tf.cast(is_missing, tf.float32),
dense_emb_size,
activation=base_activation,
dense_type=input_dense_type,
)
else:
continuous_features_filled = dense_continuous_features
normalized_features = input_normalizing_layer(continuous_features_filled, is_training)
with tf.variable_scope("continuous_sub_branch"):
normalized_features = adaptive_transformation(
normalized_features, is_training, func_type="tiny"
)
if input_dropout_rate is not None:
normalized_features = tensor_dropout(
normalized_features,
input_dropout_rate,
is_training,
sparse_tensor=False,
)
filled_feature_embed = get_dense_out(
normalized_features,
dense_emb_size,
activation=base_activation,
dense_type=input_dense_type,
)
if params.use_missing_sub_branch:
dense_embed = tf.concat(
[filled_feature_embed, missing_feature_embed], axis=1, name="merge_dense_emb"
)
else:
dense_embed = filled_feature_embed
with tf.variable_scope("sparse_branch"):
sparse_discrete_features = _sparse_feature_fixup(
features["sparse_no_continuous"], params.input_size_bits
)
if input_dropout_rate is not None:
sparse_discrete_features = tensor_dropout(
sparse_discrete_features, input_dropout_rate, is_training, sparse_tensor=True
)
discrete_features_embed = full_sparse_layer(sparse_discrete_features, use_binary_values=True)
if (input_dropout_rate is not None) and is_training:
discrete_features_embed = discrete_features_embed / (1 - input_dropout_rate)
total_embed = tf.concat(
[dense_embed, discrete_features_embed],
axis=1,
name="total_embed",
)
total_embed = tf.layers.batch_normalization(
total_embed,
training=is_training,
renorm_momentum=decay,
momentum=decay,
renorm=is_training,
trainable=True,
)
# --------------------------------------------------------
# MLP Layers
# --------------------------------------------------------
with tf.variable_scope("MLP_branch"):
assert params.num_mlp_layers >= 0
embed_list = [total_embed] + [None for _ in range(params.num_mlp_layers)]
dense_types = [emb_dense_type] + [mlp_dense_type for _ in range(params.num_mlp_layers - 1)]
for xl in range(1, params.num_mlp_layers + 1):
neurons = params.mlp_neuron_scale ** (params.num_mlp_layers + 1 - xl)
embed_list[xl] = get_dense_out(
embed_list[xl - 1], neurons, activation=base_activation, dense_type=dense_types[xl - 1]
)
if params.task_name in ["Sent", "HeavyRankPosition", "HeavyRankProbability"]:
logits = get_dense_out(embed_list[-1], 1, activation=None, dense_type=mlp_dense_type)
else:
raise ValueError("Invalid Task Name !")
output_dict = {"output": logits}
return output_dict