188 lines
6.0 KiB
Python
188 lines
6.0 KiB
Python
import warnings
|
|
|
|
from twml.contrib.layers import ZscoreNormalization
|
|
|
|
from ...libs.customized_full_sparse import FullSparse
|
|
from ...libs.get_feat_config import FEAT_CONFIG_DEFAULT_VAL as MISSING_VALUE_MARKER
|
|
from ...libs.model_utils import (
|
|
_sparse_feature_fixup,
|
|
adaptive_transformation,
|
|
filter_nans_and_infs,
|
|
get_dense_out,
|
|
tensor_dropout,
|
|
)
|
|
|
|
import tensorflow.compat.v1 as tf
|
|
# checkstyle: noqa
|
|
|
|
def light_ranking_mlp_ngbdt(features, is_training, params, label=None):
|
|
return deepnorm_light_ranking(
|
|
features,
|
|
is_training,
|
|
params,
|
|
label=label,
|
|
decay=params.momentum,
|
|
dense_emb_size=params.dense_embedding_size,
|
|
base_activation=tf.keras.layers.LeakyReLU(),
|
|
input_dropout_rate=params.dropout,
|
|
use_gbdt=False,
|
|
)
|
|
|
|
|
|
def deepnorm_light_ranking(
|
|
features,
|
|
is_training,
|
|
params,
|
|
label=None,
|
|
decay=0.99999,
|
|
dense_emb_size=128,
|
|
base_activation=None,
|
|
input_dropout_rate=None,
|
|
input_dense_type="self_atten_dense",
|
|
emb_dense_type="self_atten_dense",
|
|
mlp_dense_type="self_atten_dense",
|
|
use_gbdt=False,
|
|
):
|
|
# --------------------------------------------------------
|
|
# Initial Parameter Checking
|
|
# --------------------------------------------------------
|
|
if base_activation is None:
|
|
base_activation = tf.keras.layers.LeakyReLU()
|
|
|
|
if label is not None:
|
|
warnings.warn(
|
|
"Label is unused in deepnorm_gbdt. Stop using this argument.",
|
|
DeprecationWarning,
|
|
)
|
|
|
|
with tf.variable_scope("helper_layers"):
|
|
full_sparse_layer = FullSparse(
|
|
output_size=params.sparse_embedding_size,
|
|
activation=base_activation,
|
|
use_sparse_grads=is_training,
|
|
use_binary_values=False,
|
|
dtype=tf.float32,
|
|
)
|
|
input_normalizing_layer = ZscoreNormalization(decay=decay, name="input_normalizing_layer")
|
|
|
|
# --------------------------------------------------------
|
|
# Feature Selection & Embedding
|
|
# --------------------------------------------------------
|
|
if use_gbdt:
|
|
sparse_gbdt_features = _sparse_feature_fixup(features["gbdt_sparse"], params.input_size_bits)
|
|
if input_dropout_rate is not None:
|
|
sparse_gbdt_features = tensor_dropout(
|
|
sparse_gbdt_features, input_dropout_rate, is_training, sparse_tensor=True
|
|
)
|
|
|
|
total_embed = full_sparse_layer(sparse_gbdt_features, use_binary_values=True)
|
|
|
|
if (input_dropout_rate is not None) and is_training:
|
|
total_embed = total_embed / (1 - input_dropout_rate)
|
|
|
|
else:
|
|
with tf.variable_scope("dense_branch"):
|
|
dense_continuous_features = filter_nans_and_infs(features["continuous"])
|
|
|
|
if params.use_missing_sub_branch:
|
|
is_missing = tf.equal(dense_continuous_features, MISSING_VALUE_MARKER)
|
|
continuous_features_filled = tf.where(
|
|
is_missing,
|
|
tf.zeros_like(dense_continuous_features),
|
|
dense_continuous_features,
|
|
)
|
|
normalized_features = input_normalizing_layer(
|
|
continuous_features_filled, is_training, tf.math.logical_not(is_missing)
|
|
)
|
|
|
|
with tf.variable_scope("missing_sub_branch"):
|
|
missing_feature_embed = get_dense_out(
|
|
tf.cast(is_missing, tf.float32),
|
|
dense_emb_size,
|
|
activation=base_activation,
|
|
dense_type=input_dense_type,
|
|
)
|
|
|
|
else:
|
|
continuous_features_filled = dense_continuous_features
|
|
normalized_features = input_normalizing_layer(continuous_features_filled, is_training)
|
|
|
|
with tf.variable_scope("continuous_sub_branch"):
|
|
normalized_features = adaptive_transformation(
|
|
normalized_features, is_training, func_type="tiny"
|
|
)
|
|
|
|
if input_dropout_rate is not None:
|
|
normalized_features = tensor_dropout(
|
|
normalized_features,
|
|
input_dropout_rate,
|
|
is_training,
|
|
sparse_tensor=False,
|
|
)
|
|
filled_feature_embed = get_dense_out(
|
|
normalized_features,
|
|
dense_emb_size,
|
|
activation=base_activation,
|
|
dense_type=input_dense_type,
|
|
)
|
|
|
|
if params.use_missing_sub_branch:
|
|
dense_embed = tf.concat(
|
|
[filled_feature_embed, missing_feature_embed], axis=1, name="merge_dense_emb"
|
|
)
|
|
else:
|
|
dense_embed = filled_feature_embed
|
|
|
|
with tf.variable_scope("sparse_branch"):
|
|
sparse_discrete_features = _sparse_feature_fixup(
|
|
features["sparse_no_continuous"], params.input_size_bits
|
|
)
|
|
if input_dropout_rate is not None:
|
|
sparse_discrete_features = tensor_dropout(
|
|
sparse_discrete_features, input_dropout_rate, is_training, sparse_tensor=True
|
|
)
|
|
|
|
discrete_features_embed = full_sparse_layer(sparse_discrete_features, use_binary_values=True)
|
|
|
|
if (input_dropout_rate is not None) and is_training:
|
|
discrete_features_embed = discrete_features_embed / (1 - input_dropout_rate)
|
|
|
|
total_embed = tf.concat(
|
|
[dense_embed, discrete_features_embed],
|
|
axis=1,
|
|
name="total_embed",
|
|
)
|
|
|
|
total_embed = tf.layers.batch_normalization(
|
|
total_embed,
|
|
training=is_training,
|
|
renorm_momentum=decay,
|
|
momentum=decay,
|
|
renorm=is_training,
|
|
trainable=True,
|
|
)
|
|
|
|
# --------------------------------------------------------
|
|
# MLP Layers
|
|
# --------------------------------------------------------
|
|
with tf.variable_scope("MLP_branch"):
|
|
|
|
assert params.num_mlp_layers >= 0
|
|
embed_list = [total_embed] + [None for _ in range(params.num_mlp_layers)]
|
|
dense_types = [emb_dense_type] + [mlp_dense_type for _ in range(params.num_mlp_layers - 1)]
|
|
|
|
for xl in range(1, params.num_mlp_layers + 1):
|
|
neurons = params.mlp_neuron_scale ** (params.num_mlp_layers + 1 - xl)
|
|
embed_list[xl] = get_dense_out(
|
|
embed_list[xl - 1], neurons, activation=base_activation, dense_type=dense_types[xl - 1]
|
|
)
|
|
|
|
if params.task_name in ["Sent", "HeavyRankPosition", "HeavyRankProbability"]:
|
|
logits = get_dense_out(embed_list[-1], 1, activation=None, dense_type=mlp_dense_type)
|
|
|
|
else:
|
|
raise ValueError("Invalid Task Name !")
|
|
|
|
output_dict = {"output": logits}
|
|
return output_dict
|